~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/python
#
# Copyright 2009 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
#
# parselogs.py
# Christian Reis <kiko@async.com.br>
#
# Parses Launchpad error logs and returns a list of most frequent errors

import re
import pprint
import sys
import time
import datetime

COUNT = 10
LAST_DAYS = 7

def init_or_set(d, v):
    if d.has_key(v):
        d[v] += 1
    else:
        d[v] = 1

def init_list_or_set(d, v, e):
    if d.has_key(v):
        d[v][e] = 1
    else:
        d[v] = {e: 1}

if len(sys.argv) == 1:
    lognames = ["launchpad1.log", "launchpad2.log"]
else:
    lognames = sys.argv[1:]

exceptions = {}
expired = {}
url_table = {}

now = datetime.datetime.fromtimestamp(time.time())
for logname in lognames:
    text = open(logname).read()
    errors = text.split("------")
    for error in errors:
        error = error.strip()
        if not error:
            continue

        fullerror = error
        error = error.split("\n")[-1].strip()
        first_line = fullerror.split("\n")[0]

        date = first_line.split(" ")[0]
        # XXX kiko 2005-10-17: handle timezone properly; it kinda sucks that
        # we have no way of knowing what timezone the log originates from.
        # For now I hack around this by assuming timezone is UTC.
        ts = time.strftime("%s", time.strptime(date, "%Y-%m-%dT%H:%M:%S"))
        then = datetime.datetime.fromtimestamp(float(ts))
        if now - then > datetime.timedelta(days=LAST_DAYS):
            continue

        if " WARNING " in error:
            continue
        extra = " ".join(first_line.split()[3:])
        if "RequestExpired:" in error:
            error = "RequestExpired: %s" % extra
            init_or_set(expired, error)
            continue
        if re.search("0x[abcdef0-9]+", error):
            error = re.sub("0x[abcdef0-9]+", "INSTANCE-ID", error)
        init_or_set(exceptions, error)
        init_list_or_set(url_table, error, extra)

values = exceptions.items()
values.sort(key=lambda x: x[1], reverse=True)

print
print "=== Top %d exceptions in the past %d days ===" % (COUNT, LAST_DAYS)
print
for exc, count in values[:COUNT]:
    print count, "\t", exc
    print "\t\t", "\n\t\t".join(url_table[exc].keys()[:10])

values = expired.items()
values.sort(key=lambda x: x[1], reverse=True)

print
print
print "=== Top %d timed out pages in the past %d days ===" % (COUNT, LAST_DAYS)
print
for url, count in values[:COUNT]:
    print count, "\t", url