~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python
# Copyright 2005 Canonical Ltd.  All rights reserved.

# Analyse Launchpad error reports and return a list of most frequent errors

__metaclass__ = type

import sys
import os
import re
import rfc822
import time
import datetime

COUNT = 15

def _parsedate(s):
    """Return a naive date time object for the given ISO 8601 string.

    This function ignores subsecond accuracy and the timezone.
    """
    dt = time.strptime(s[:19], '%Y-%m-%dT%H:%M:%S')
    return datetime.datetime(*dt[:6])


class ErrorData:
    """Data about a particular exception"""
    def __init__(self, etype, evalue):
        self.etype = etype
        self.evalue = evalue
        self.urls = {}

    def addUrl(self, url, oopsid):
        self.urls.setdefault(url, set()).add(oopsid)

    def count(self):
        return sum(len(oopsids) for oopsids in self.urls.itervalues())


class ErrorSummary:
    def __init__(self):
        self.expired = {}
        self.notfound = {}
        self.exceptions = {}
        self.exc_count = 0
        self.start = None
        self.end = None

    def addOops(self, errordict, etype, evalue, url, oopsid):
        data = errordict.setdefault((etype, evalue),
                                    ErrorData(etype, evalue))
        data.addUrl(url, oopsid)

    def processOops(self, fname):
        msg = rfc822.Message(open(fname, 'r'))

        # if there is no OOPS ID, then it is not an OOPS
        oopsid = msg.getheader('oops-id')
        if oopsid is None:
            return

        self.exc_count += 1

        # add the date to oopsid to make it unique
        datestr = msg.getheader('date')
        if datestr is not None:
            date = _parsedate(datestr)
            if self.start is None or self.start > date:
                self.start = date
            if self.end is None or self.end < date:
                self.end = date

        url = msg.getheader('url')
        etype = msg.getheader('exception-type')
        evalue = msg.getheader('exception-value')

        # replace pointer values in exception values with a constant
        # string.
        evalue = re.sub("0x[abcdef0-9]+", "INSTANCE-ID", evalue)

        if etype in ('RequestExpired', 'RequestQueryTimedOut'):
            self.addOops(self.expired, etype, evalue, url, oopsid)
        elif etype == 'NotFound':
            self.addOops(self.notfound, etype, evalue, url, oopsid)
        else:
            self.addOops(self.exceptions, etype, evalue, url, oopsid)

    def processDir(self, directory):
        for filename in os.listdir(directory):
            path = os.path.join(directory, filename)
            if os.path.isfile(path):
                self.processOops(path)

    def printTable(self, source, title):
        print '=== Top %d %s ===' % (COUNT, title)
        print

        errors = sorted(source.itervalues(),
                        key=lambda data: data.count(),
                        reverse=True)

        for data in errors[:COUNT]:
            print '%4d %s: %s' % (data.count(), data.etype, data.evalue)
            urls = sorted(((len(oopsids), url) for (url, oopsids)
                                                   in data.urls.iteritems()),
                          reverse=True)
            # print the first three URLs
            for (count, url) in urls[:3]:
                print '    %4d %s' % (count, url)
                print '        %s' % ', '.join(sorted(data.urls[url])[:5])
            if len(urls) > 3:
                print '    [%s other URLs]' % (len(urls) - 3)
            print
        print
            
    def printReport(self):
        self.printTable(self.expired, 'Time Out Pages')
        self.printTable(self.notfound, '404 Pages')
        self.printTable(self.exceptions, 'Exceptions')

        period = self.end - self.start
        days = period.days + period.seconds / 86400.0

        print "=== Statistics ==="
        print
        print " * Log starts: %s" % self.start
        print " * Analyzed period: %.2f days" % days
        print " * Total exceptions: %d" % self.exc_count
        print " * Average exceptions per day: %.2f" % (self.exc_count / days)
        print


if __name__ == '__main__':
    summary = ErrorSummary()
    if not sys.argv[1:]:
        sys.stderr.write('usage: %s directory ...\n' % sys.argv[0])
        sys.exit(1)
    for directory in sys.argv[1:]:
        summary.processDir(directory)
    summary.printReport()