1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
1 |
#!/usr/bin/env python
|
2 |
# Copyright 2004-2005 Canonical Ltd. All rights reserved.
|
|
3 |
"""
|
|
4 |
Process LinkChecker .csv results for the staging server, stuff them into
|
|
5 |
a database and generate a report suitable for spamming developers with.
|
|
6 |
"""
|
|
7 |
||
8 |
__metaclass__ = type |
|
9 |
||
10 |
import _pythonpath |
|
11 |
||
12 |
import csv, re, sys |
|
13 |
from StringIO import StringIO |
|
14 |
from optparse import OptionParser |
|
15 |
from sqlobject import StringCol, IntCol, BoolCol, FloatCol, DatabaseIndex |
|
16 |
from canonical.database.datetimecol import UtcDateTimeCol |
|
17 |
from canonical.database.constants import UTC_NOW |
|
18 |
from canonical.launchpad.scripts import db_options, logger_options, logger |
|
19 |
from canonical.lp import initZopeless |
|
20 |
from canonical.database.sqlbase import SQLBase |
|
21 |
from canonical.config import config |
|
22 |
from canonical.launchpad.mail import simple_sendmail |
|
23 |
||
24 |
||
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
25 |
class CheckedLink(SQLBase): |
26 |
_table = 'CheckedLink' |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
27 |
urlname = StringCol(notNull=True) |
28 |
recursionlevel = IntCol(notNull=True) |
|
29 |
parentname = StringCol(notNull=True) |
|
30 |
baseref = StringCol(notNull=True) |
|
31 |
result = StringCol(notNull=True) |
|
32 |
resultcode = IntCol(notNull=True) |
|
33 |
warningstring = StringCol(notNull=True) |
|
34 |
infostring = StringCol(notNull=True) |
|
35 |
valid = BoolCol(notNull=True) |
|
36 |
url = StringCol(notNull=True, unique=True, alternateID=True) |
|
37 |
line = IntCol(notNull=True) |
|
38 |
col = IntCol(notNull=True) |
|
39 |
name = StringCol(notNull=True) |
|
40 |
dltime = FloatCol() |
|
41 |
dlsize = IntCol() |
|
42 |
checktime = FloatCol(notNull=True) |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
43 |
brokensince = UtcDateTimeCol(notNull=False, default=UTC_NOW) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
44 |
#cached = BoolCol(notNull=True)
|
45 |
||
46 |
resultcode_index = DatabaseIndex('resultcode') |
|
47 |
recursionlevel_index = DatabaseIndex('recursionlevel') |
|
48 |
||
49 |
||
50 |
def main(csvfile, log): |
|
51 |
||
52 |
# Where we store broken links
|
|
53 |
broken = set() |
|
54 |
||
55 |
# Suck in the csv file, updating the database and adding to the broken set
|
|
56 |
reader = csv.DictReader( |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
57 |
(line.replace('\0','') for line in csvfile |
58 |
if not line.startswith('#')) |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
59 |
)
|
60 |
for row in reader: |
|
61 |
# Get the result code
|
|
2041
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
62 |
if row['valid']: |
63 |
row['resultcode'] = 200 |
|
64 |
row['result'] = '200 Ok' |
|
65 |
else: |
|
66 |
m = re.search('^(\d+)', row['result'] or '') |
|
67 |
if m is None: |
|
68 |
if row['result'] == 'URL is empty': |
|
69 |
continue
|
|
70 |
elif 'The read operation timed out' in row['result']: |
|
71 |
row['result'] = '601 %s' % row['result'] |
|
72 |
row['resultcode'] = 601 |
|
73 |
else: |
|
74 |
row['result'] = '602 %s' % row['result'] |
|
75 |
row['resultcode'] = 602 |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
76 |
else: |
2041
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
77 |
row['resultcode'] = int(m.group(1)) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
78 |
|
79 |
# Cast input and nuke crap (to avoid confusing SQLObject)
|
|
80 |
row['recursionlevel'] = int(row['recursionlevel']) |
|
81 |
row['valid'] = row['valid'] in ('True', 'true') |
|
82 |
row['line'] = int(row['line']) |
|
83 |
row['col'] = int(row['column']) # Renamed - column is a SQL keyword |
|
84 |
del row['column'] |
|
85 |
row['dltime'] = float(row['dltime']) |
|
86 |
row['dlsize'] = int(row['dlsize']) |
|
87 |
row['checktime'] = float(row['checktime']) |
|
88 |
del row['cached'] |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
89 |
if row['resultcode'] < 400: |
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
90 |
row['brokensince'] = None |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
91 |
|
92 |
try: |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
93 |
link = CheckedLink.byUrl(row['url']) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
94 |
link.set(**row) |
95 |
except LookupError: |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
96 |
link = CheckedLink(**row) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
97 |
broken.add(link) |
98 |
||
99 |
total = len(broken) |
|
100 |
||
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
101 |
# Delete any entries that were not spidered
|
102 |
# TODO: Only if older than a threshold -- StuartBishop 20050704
|
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
103 |
for link in CheckedLink.select(): |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
104 |
if link in broken: |
105 |
continue
|
|
106 |
link.destroySelf() |
|
107 |
||
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
108 |
new_broken_links = CheckedLink.select(""" |
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
109 |
resultcode in (404, 500, 601)
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
110 |
AND brokensince > CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
|
111 |
- '1 day 12 hours'::interval
|
|
112 |
""", orderBy=["recursionlevel", "parentname", "url"]) |
|
113 |
||
114 |
rep = report("New Arrivals", new_broken_links, total, brokensince=False) |
|
115 |
||
116 |
old_broken_links = CheckedLink.select(""" |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
117 |
resultcode in (404, 500, 601)
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
118 |
AND brokensince <= CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
|
119 |
- '1 day 12 hours'::interval
|
|
120 |
AND brokensince >
|
|
121 |
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
|
|
122 |
""", orderBy=["recursionlevel", "parentname", "url"]) |
|
123 |
||
124 |
rep += report("Old Favorites", old_broken_links, total, brokensince=True) |
|
125 |
||
126 |
antique_broken_links = CheckedLink.select(""" |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
127 |
resultcode in (404, 500, 601)
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
128 |
AND brokensince <=
|
129 |
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
|
|
130 |
""", orderBy=["brokensince", "recursionlevel", "parentname", "url"]) |
|
131 |
||
132 |
rep += report( |
|
133 |
"Hall of Shame", antique_broken_links, total, brokensince=True |
|
134 |
)
|
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
135 |
|
136 |
if not options.email: |
|
137 |
# Print to stdout in system encoding - might raise UnicodeError on
|
|
138 |
# some systems. Tough.
|
|
139 |
print rep |
|
140 |
else: |
|
141 |
# Override this setting - we are only here if email explicitly
|
|
142 |
# requested on the command line.
|
|
143 |
config.zopeless.send_email = True |
|
144 |
simple_sendmail( |
|
145 |
"noreply@canonical.com", [options.email], options.subject, |
|
146 |
rep, {'Keywords': 'LinkChecker', 'X-Fnord': 'Fnord'} |
|
147 |
)
|
|
148 |
||
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
149 |
def report(title, links, total, brokensince=True): |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
150 |
|
151 |
out = StringIO() |
|
152 |
||
153 |
heading = "%s (%d/%d)" % (title, links.count(), total) |
|
154 |
print >> out, heading |
|
155 |
print >> out, "=" * len(heading) |
|
156 |
||
157 |
def print_row(title, value): |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
158 |
print >> out, "%-7s: %s" % (title, str(value)) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
159 |
|
160 |
for link in links: |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
161 |
print_row("Link", link.url) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
162 |
print_row("Parent", link.parentname) |
163 |
print_row("Result", link.result) |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
164 |
if link.warningstring: |
165 |
print_row("Warning", link.warningstring) |
|
166 |
if brokensince: |
|
1991
by Canonical.com Patch Queue Manager
[trivial] Add hall of shame |
167 |
print_row("Since", link.since.strftime('%A %d %B %Y')) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
168 |
print >> out |
169 |
print >> out |
|
170 |
||
171 |
return out.getvalue() |
|
172 |
||
173 |
||
174 |
if __name__ == '__main__': |
|
175 |
parser = OptionParser("Usage: %prog [OPTIONS] [input.csv]") |
|
176 |
db_options(parser) |
|
177 |
logger_options(parser) |
|
178 |
||
179 |
parser.add_option( |
|
180 |
"-c", "--create", action="store_true", dest="create", |
|
181 |
default=False, help="Create the database tables" |
|
182 |
)
|
|
183 |
||
184 |
parser.add_option( |
|
185 |
"-s", "--subject", dest="subject", help="Email using SUBJECT", |
|
186 |
metavar="SUBJECT", default="LinkChecker report" |
|
187 |
)
|
|
188 |
||
189 |
parser.add_option( |
|
190 |
"-t", "--to", dest="email", help="Email to ADDRESS", |
|
191 |
metavar="ADDRESS", default=None |
|
192 |
)
|
|
193 |
||
194 |
options, args = parser.parse_args() |
|
195 |
||
196 |
log = logger(options) |
|
197 |
||
198 |
if len(args) == 0 or args[0] == '-': |
|
199 |
log.debug("Reading from stdin") |
|
200 |
csvfile = sys.stdin |
|
201 |
else: |
|
202 |
csvfile = open(args[0], 'rb') |
|
203 |
||
204 |
ztm = initZopeless(implicitBegin=True) |
|
205 |
||
206 |
if options.create: |
|
207 |
# Create the table if it doesn't exist. Unfortunately, this is broken
|
|
208 |
# so we only create the table if requested on the command line
|
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
209 |
CheckedLink.createTable(ifNotExists=True) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
210 |
|
211 |
main(csvfile, log) |
|
212 |
ztm.commit() |
|
213 |