10637.3.1
by Guilherme Salgado
Use the default python version instead of a hard-coded version |
1 |
#!/usr/bin/python -S
|
8687.15.22
by Karl Fogel
Add the copyright header block to the remaining .py files. |
2 |
#
|
3 |
# Copyright 2009 Canonical Ltd. This software is licensed under the
|
|
4 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
|
5 |
||
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
6 |
"""
|
7 |
Process LinkChecker .csv results for the staging server, stuff them into
|
|
8 |
a database and generate a report suitable for spamming developers with.
|
|
9 |
"""
|
|
10 |
||
11 |
__metaclass__ = type |
|
12 |
||
7178.3.1
by Curtis Hovey
Remove that last of the callsites that mutate the config. |
13 |
# pylint: disable-msg=W0403
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
14 |
import _pythonpath |
15 |
||
16 |
import csv, re, sys |
|
17 |
from StringIO import StringIO |
|
18 |
from optparse import OptionParser |
|
19 |
from sqlobject import StringCol, IntCol, BoolCol, FloatCol, DatabaseIndex |
|
20 |
from canonical.database.datetimecol import UtcDateTimeCol |
|
21 |
from canonical.database.constants import UTC_NOW |
|
22 |
from canonical.launchpad.scripts import db_options, logger_options, logger |
|
23 |
from canonical.lp import initZopeless |
|
24 |
from canonical.database.sqlbase import SQLBase |
|
25 |
from canonical.config import config |
|
26 |
from canonical.launchpad.mail import simple_sendmail |
|
27 |
||
28 |
||
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
29 |
class CheckedLink(SQLBase): |
30 |
_table = 'CheckedLink' |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
31 |
urlname = StringCol(notNull=True) |
32 |
recursionlevel = IntCol(notNull=True) |
|
33 |
parentname = StringCol(notNull=True) |
|
34 |
baseref = StringCol(notNull=True) |
|
35 |
result = StringCol(notNull=True) |
|
36 |
resultcode = IntCol(notNull=True) |
|
37 |
warningstring = StringCol(notNull=True) |
|
38 |
infostring = StringCol(notNull=True) |
|
39 |
valid = BoolCol(notNull=True) |
|
40 |
url = StringCol(notNull=True, unique=True, alternateID=True) |
|
41 |
line = IntCol(notNull=True) |
|
42 |
col = IntCol(notNull=True) |
|
43 |
name = StringCol(notNull=True) |
|
44 |
dltime = FloatCol() |
|
45 |
dlsize = IntCol() |
|
46 |
checktime = FloatCol(notNull=True) |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
47 |
brokensince = UtcDateTimeCol(notNull=False, default=UTC_NOW) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
48 |
#cached = BoolCol(notNull=True)
|
49 |
||
50 |
resultcode_index = DatabaseIndex('resultcode') |
|
51 |
recursionlevel_index = DatabaseIndex('recursionlevel') |
|
52 |
||
53 |
||
54 |
def main(csvfile, log): |
|
55 |
||
56 |
# Where we store broken links
|
|
57 |
broken = set() |
|
58 |
||
59 |
# Suck in the csv file, updating the database and adding to the broken set
|
|
60 |
reader = csv.DictReader( |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
61 |
(line.replace('\0','') for line in csvfile |
62 |
if not line.startswith('#')) |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
63 |
)
|
64 |
for row in reader: |
|
65 |
# Get the result code
|
|
2041
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
66 |
if row['valid']: |
67 |
row['resultcode'] = 200 |
|
68 |
row['result'] = '200 Ok' |
|
69 |
else: |
|
70 |
m = re.search('^(\d+)', row['result'] or '') |
|
71 |
if m is None: |
|
72 |
if row['result'] == 'URL is empty': |
|
73 |
continue
|
|
74 |
elif 'The read operation timed out' in row['result']: |
|
75 |
row['result'] = '601 %s' % row['result'] |
|
76 |
row['resultcode'] = 601 |
|
77 |
else: |
|
78 |
row['result'] = '602 %s' % row['result'] |
|
79 |
row['resultcode'] = 602 |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
80 |
else: |
2041
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
81 |
row['resultcode'] = int(m.group(1)) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
82 |
|
83 |
# Cast input and nuke crap (to avoid confusing SQLObject)
|
|
84 |
row['recursionlevel'] = int(row['recursionlevel']) |
|
85 |
row['valid'] = row['valid'] in ('True', 'true') |
|
86 |
row['line'] = int(row['line']) |
|
87 |
row['col'] = int(row['column']) # Renamed - column is a SQL keyword |
|
88 |
del row['column'] |
|
89 |
row['dltime'] = float(row['dltime']) |
|
90 |
row['dlsize'] = int(row['dlsize']) |
|
91 |
row['checktime'] = float(row['checktime']) |
|
92 |
del row['cached'] |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
93 |
if row['resultcode'] < 400: |
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
94 |
row['brokensince'] = None |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
95 |
|
96 |
try: |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
97 |
link = CheckedLink.byUrl(row['url']) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
98 |
link.set(**row) |
99 |
except LookupError: |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
100 |
link = CheckedLink(**row) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
101 |
broken.add(link) |
102 |
||
103 |
total = len(broken) |
|
104 |
||
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
105 |
# Delete any entries that were not spidered
|
4664.1.1
by Curtis Hovey
Normalized comments for bug 3732. |
106 |
# XXX StuartBishop 2005-07-04: Only if older than a threshold.
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
107 |
for link in CheckedLink.select(): |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
108 |
if link in broken: |
109 |
continue
|
|
110 |
link.destroySelf() |
|
111 |
||
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
112 |
new_broken_links = CheckedLink.select(""" |
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
113 |
resultcode in (404, 500, 601)
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
114 |
AND brokensince > CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
|
115 |
- '1 day 12 hours'::interval
|
|
116 |
""", orderBy=["recursionlevel", "parentname", "url"]) |
|
117 |
||
118 |
rep = report("New Arrivals", new_broken_links, total, brokensince=False) |
|
119 |
||
120 |
old_broken_links = CheckedLink.select(""" |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
121 |
resultcode in (404, 500, 601)
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
122 |
AND brokensince <= CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
|
123 |
- '1 day 12 hours'::interval
|
|
124 |
AND brokensince >
|
|
125 |
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
|
|
126 |
""", orderBy=["recursionlevel", "parentname", "url"]) |
|
127 |
||
128 |
rep += report("Old Favorites", old_broken_links, total, brokensince=True) |
|
129 |
||
130 |
antique_broken_links = CheckedLink.select(""" |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
131 |
resultcode in (404, 500, 601)
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
132 |
AND brokensince <=
|
133 |
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
|
|
134 |
""", orderBy=["brokensince", "recursionlevel", "parentname", "url"]) |
|
135 |
||
136 |
rep += report( |
|
137 |
"Hall of Shame", antique_broken_links, total, brokensince=True |
|
138 |
)
|
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
139 |
|
140 |
if not options.email: |
|
141 |
# Print to stdout in system encoding - might raise UnicodeError on
|
|
142 |
# some systems. Tough.
|
|
143 |
print rep |
|
144 |
else: |
|
145 |
# Override this setting - we are only here if email explicitly
|
|
146 |
# requested on the command line.
|
|
7178.3.1
by Curtis Hovey
Remove that last of the callsites that mutate the config. |
147 |
send_email_data = """ |
148 |
[zopeless]
|
|
149 |
send_email: True
|
|
150 |
"""
|
|
151 |
config.push('send_email_data', send_email_data) |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
152 |
simple_sendmail( |
153 |
"noreply@canonical.com", [options.email], options.subject, |
|
154 |
rep, {'Keywords': 'LinkChecker', 'X-Fnord': 'Fnord'} |
|
155 |
)
|
|
7178.3.1
by Curtis Hovey
Remove that last of the callsites that mutate the config. |
156 |
config.pop('send_email_data') |
157 |
||
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
158 |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
159 |
def report(title, links, total, brokensince=True): |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
160 |
|
161 |
out = StringIO() |
|
162 |
||
163 |
heading = "%s (%d/%d)" % (title, links.count(), total) |
|
164 |
print >> out, heading |
|
165 |
print >> out, "=" * len(heading) |
|
166 |
||
167 |
def print_row(title, value): |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
168 |
print >> out, "%-7s: %s" % (title, str(value)) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
169 |
|
170 |
for link in links: |
|
2034
by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks |
171 |
print_row("Link", link.url) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
172 |
print_row("Parent", link.parentname) |
173 |
print_row("Result", link.result) |
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
174 |
if link.warningstring: |
175 |
print_row("Warning", link.warningstring) |
|
176 |
if brokensince: |
|
1991
by Canonical.com Patch Queue Manager
[trivial] Add hall of shame |
177 |
print_row("Since", link.since.strftime('%A %d %B %Y')) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
178 |
print >> out |
179 |
print >> out |
|
180 |
||
181 |
return out.getvalue() |
|
182 |
||
183 |
||
184 |
if __name__ == '__main__': |
|
185 |
parser = OptionParser("Usage: %prog [OPTIONS] [input.csv]") |
|
186 |
db_options(parser) |
|
187 |
logger_options(parser) |
|
188 |
||
189 |
parser.add_option( |
|
190 |
"-c", "--create", action="store_true", dest="create", |
|
191 |
default=False, help="Create the database tables" |
|
192 |
)
|
|
193 |
||
194 |
parser.add_option( |
|
195 |
"-s", "--subject", dest="subject", help="Email using SUBJECT", |
|
196 |
metavar="SUBJECT", default="LinkChecker report" |
|
197 |
)
|
|
198 |
||
199 |
parser.add_option( |
|
200 |
"-t", "--to", dest="email", help="Email to ADDRESS", |
|
201 |
metavar="ADDRESS", default=None |
|
202 |
)
|
|
10303.1.1
by Gary Poster
use newest version of zc.buildout |
203 |
|
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
204 |
options, args = parser.parse_args() |
205 |
||
206 |
log = logger(options) |
|
207 |
||
208 |
if len(args) == 0 or args[0] == '-': |
|
209 |
log.debug("Reading from stdin") |
|
210 |
csvfile = sys.stdin |
|
211 |
else: |
|
212 |
csvfile = open(args[0], 'rb') |
|
213 |
||
12415.1.5
by William Grant
Purge implicit_begin/implicitBegin; ignored since Storm. |
214 |
ztm = initZopeless() |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
215 |
|
216 |
if options.create: |
|
217 |
# Create the table if it doesn't exist. Unfortunately, this is broken
|
|
218 |
# so we only create the table if requested on the command line
|
|
2027
by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks |
219 |
CheckedLink.createTable(ifNotExists=True) |
1990
by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker |
220 |
|
221 |
main(csvfile, log) |
|
222 |
ztm.commit() |
|
223 |