1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
#!/usr/bin/python2.4
# parselogs.py
# Christian Reis <kiko@async.com.br>
#
# Parses Launchpad error logs and returns a list of most frequent errors
import re
import pprint
import sys
import time
import datetime
COUNT = 10
LAST_DAYS = 7
def init_or_set(d, v):
if d.has_key(v):
d[v] += 1
else:
d[v] = 1
def init_list_or_set(d, v, e):
if d.has_key(v):
d[v][e] = 1
else:
d[v] = {e: 1}
if len(sys.argv) == 1:
lognames = ["launchpad1.log", "launchpad2.log"]
else:
lognames = sys.argv[1:]
exceptions = {}
expired = {}
url_table = {}
now = datetime.datetime.fromtimestamp(time.time())
for logname in lognames:
text = open(logname).read()
errors = text.split("------")
for error in errors:
error = error.strip()
if not error:
continue
fullerror = error
error = error.split("\n")[-1].strip()
first_line = fullerror.split("\n")[0]
date = first_line.split(" ")[0]
# XXX kiko 2005-10-17: handle timezone properly; it kinda sucks that
# we have no way of knowing what timezone the log originates from.
# For now I hack around this by assuming timezone is UTC.
ts = time.strftime("%s", time.strptime(date, "%Y-%m-%dT%H:%M:%S"))
then = datetime.datetime.fromtimestamp(float(ts))
if now - then > datetime.timedelta(days=LAST_DAYS):
continue
if " WARNING " in error:
continue
extra = " ".join(first_line.split()[3:])
if "RequestExpired:" in error:
error = "RequestExpired: %s" % extra
init_or_set(expired, error)
continue
if re.search("0x[abcdef0-9]+", error):
error = re.sub("0x[abcdef0-9]+", "INSTANCE-ID", error)
init_or_set(exceptions, error)
init_list_or_set(url_table, error, extra)
values = exceptions.items()
values.sort(key=lambda x: x[1], reverse=True)
print
print "=== Top %d exceptions in the past %d days ===" % (COUNT, LAST_DAYS)
print
for exc, count in values[:COUNT]:
print count, "\t", exc
print "\t\t", "\n\t\t".join(url_table[exc].keys()[:10])
values = expired.items()
values.sort(key=lambda x: x[1], reverse=True)
print
print
print "=== Top %d timed out pages in the past %d days ===" % (COUNT, LAST_DAYS)
print
for url, count in values[:COUNT]:
print count, "\t", url
|