1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
#!/usr/bin/env python
# Copyright 2005 Canonical Ltd. All rights reserved.
# Analyse Launchpad error reports and return a list of most frequent errors
__metaclass__ = type
import sys
import os
import re
import rfc822
import time
import datetime
COUNT = 15
def _parsedate(s):
"""Return a naive date time object for the given ISO 8601 string.
This function ignores subsecond accuracy and the timezone.
"""
dt = time.strptime(s[:19], '%Y-%m-%dT%H:%M:%S')
return datetime.datetime(*dt[:6])
class ErrorData:
"""Data about a particular exception"""
def __init__(self, etype, evalue):
self.etype = etype
self.evalue = evalue
self.urls = {}
def addUrl(self, url, oopsid):
self.urls.setdefault(url, set()).add(oopsid)
def count(self):
return sum(len(oopsids) for oopsids in self.urls.itervalues())
class ErrorSummary:
def __init__(self):
self.expired = {}
self.notfound = {}
self.exceptions = {}
self.exc_count = 0
self.start = None
self.end = None
def addOops(self, errordict, etype, evalue, url, oopsid):
data = errordict.setdefault((etype, evalue),
ErrorData(etype, evalue))
data.addUrl(url, oopsid)
def processOops(self, fname):
msg = rfc822.Message(open(fname, 'r'))
# if there is no OOPS ID, then it is not an OOPS
oopsid = msg.getheader('oops-id')
if oopsid is None:
return
self.exc_count += 1
# add the date to oopsid to make it unique
datestr = msg.getheader('date')
if datestr is not None:
date = _parsedate(datestr)
if self.start is None or self.start > date:
self.start = date
if self.end is None or self.end < date:
self.end = date
url = msg.getheader('url')
etype = msg.getheader('exception-type')
evalue = msg.getheader('exception-value')
# replace pointer values in exception values with a constant
# string.
evalue = re.sub("0x[abcdef0-9]+", "INSTANCE-ID", evalue)
if etype in ('RequestExpired', 'RequestQueryTimedOut'):
self.addOops(self.expired, etype, evalue, url, oopsid)
elif etype == 'NotFound':
self.addOops(self.notfound, etype, evalue, url, oopsid)
else:
self.addOops(self.exceptions, etype, evalue, url, oopsid)
def processDir(self, directory):
for filename in os.listdir(directory):
path = os.path.join(directory, filename)
if os.path.isfile(path):
self.processOops(path)
def printTable(self, source, title):
print '=== Top %d %s ===' % (COUNT, title)
print
errors = sorted(source.itervalues(),
key=lambda data: data.count(),
reverse=True)
for data in errors[:COUNT]:
print '%4d %s: %s' % (data.count(), data.etype, data.evalue)
urls = sorted(((len(oopsids), url) for (url, oopsids)
in data.urls.iteritems()),
reverse=True)
# print the first three URLs
for (count, url) in urls[:3]:
print ' %4d %s' % (count, url)
print ' %s' % ', '.join(sorted(data.urls[url])[:5])
if len(urls) > 3:
print ' [%s other URLs]' % (len(urls) - 3)
print
print
def printReport(self):
self.printTable(self.expired, 'Time Out Pages')
self.printTable(self.notfound, '404 Pages')
self.printTable(self.exceptions, 'Exceptions')
period = self.end - self.start
days = period.days + period.seconds / 86400.0
print "=== Statistics ==="
print
print " * Log starts: %s" % self.start
print " * Analyzed period: %.2f days" % days
print " * Total exceptions: %d" % self.exc_count
print " * Average exceptions per day: %.2f" % (self.exc_count / days)
print
if __name__ == '__main__':
summary = ErrorSummary()
if not sys.argv[1:]:
sys.stderr.write('usage: %s directory ...\n' % sys.argv[0])
sys.exit(1)
for directory in sys.argv[1:]:
summary.processDir(directory)
summary.printReport()
|