1716.1.203
by Christian Reis
Add a utility that parses the launchpad server logs and returns a report of the top errors |
1 |
#!/usr/bin/python2.4
|
2 |
# parselogs.py
|
|
3 |
# Christian Reis <kiko@async.com.br>
|
|
4 |
#
|
|
5 |
# Parses Launchpad error logs and returns a list of most frequent errors
|
|
6 |
||
7 |
import re |
|
8 |
import pprint |
|
9 |
import sys |
|
10 |
import time |
|
11 |
import datetime |
|
12 |
||
13 |
COUNT = 10 |
|
14 |
LAST_DAYS = 7 |
|
15 |
||
16 |
def init_or_set(d, v): |
|
17 |
if d.has_key(v): |
|
18 |
d[v] += 1 |
|
19 |
else: |
|
20 |
d[v] = 1 |
|
21 |
||
22 |
def init_list_or_set(d, v, e): |
|
23 |
if d.has_key(v): |
|
24 |
d[v][e] = 1 |
|
25 |
else: |
|
26 |
d[v] = {e: 1} |
|
27 |
||
28 |
if len(sys.argv) == 1: |
|
29 |
lognames = ["launchpad1.log", "launchpad2.log"] |
|
30 |
else: |
|
31 |
lognames = sys.argv[1:] |
|
32 |
||
33 |
exceptions = {} |
|
34 |
expired = {} |
|
35 |
url_table = {} |
|
36 |
||
37 |
now = datetime.datetime.fromtimestamp(time.time()) |
|
38 |
for logname in lognames: |
|
39 |
text = open(logname).read() |
|
40 |
errors = text.split("------") |
|
41 |
for error in errors: |
|
42 |
error = error.strip() |
|
43 |
if not error: |
|
44 |
continue
|
|
45 |
||
46 |
fullerror = error |
|
47 |
error = error.split("\n")[-1].strip() |
|
48 |
first_line = fullerror.split("\n")[0] |
|
49 |
||
50 |
date = first_line.split(" ")[0] |
|
51 |
# XXX: handle timezone properly; it kinda sucks that we have no
|
|
52 |
# way of knowing what timezone the log originates from. For now
|
|
53 |
# I hack around this by assuming timezone is UTC.
|
|
54 |
ts = time.strftime("%s", time.strptime(date, "%Y-%m-%dT%H:%M:%S")) |
|
55 |
then = datetime.datetime.fromtimestamp(float(ts)) |
|
56 |
if now - then > datetime.timedelta(days=LAST_DAYS): |
|
57 |
continue
|
|
58 |
||
59 |
if " WARNING " in error: |
|
60 |
continue
|
|
61 |
extra = " ".join(first_line.split()[3:]) |
|
62 |
if "RequestExpired:" in error: |
|
63 |
error = "RequestExpired: %s" % extra |
|
64 |
init_or_set(expired, error) |
|
65 |
continue
|
|
66 |
if re.search("0x[abcdef0-9]+", error): |
|
67 |
error = re.sub("0x[abcdef0-9]+", "INSTANCE-ID", error) |
|
68 |
init_or_set(exceptions, error) |
|
69 |
init_list_or_set(url_table, error, extra) |
|
70 |
||
71 |
values = exceptions.items() |
|
72 |
values.sort(key=lambda x: x[1], reverse=True) |
|
73 |
||
74 |
print
|
|
75 |
print "=== Top %d exceptions in the past %d days ===" % (COUNT, LAST_DAYS) |
|
76 |
print
|
|
77 |
for exc, count in values[:COUNT]: |
|
78 |
print count, "\t", exc |
|
79 |
print "\t\t", "\n\t\t".join(url_table[exc].keys()[:10]) |
|
80 |
||
81 |
values = expired.items() |
|
82 |
values.sort(key=lambda x: x[1], reverse=True) |
|
83 |
||
84 |
print
|
|
85 |
print
|
|
86 |
print "=== Top %d timed out pages in the past %d days ===" % (COUNT, LAST_DAYS) |
|
87 |
print
|
|
88 |
for url, count in values[:COUNT]: |
|
89 |
print count, "\t", url |
|
90 |
||
91 |