~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.22 by Karl Fogel
Add the copyright header block to the remaining .py files.
2
#
3
# Copyright 2009 Canonical Ltd.  This software is licensed under the
4
# GNU Affero General Public License version 3 (see the file LICENSE).
5
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
6
"""
7
Process LinkChecker .csv results for the staging server, stuff them into
8
a database and generate a report suitable for spamming developers with.
9
"""
10
11
__metaclass__ = type
12
7178.3.1 by Curtis Hovey
Remove that last of the callsites that mutate the config.
13
# pylint: disable-msg=W0403
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
14
import _pythonpath
15
16
import csv, re, sys
17
from StringIO import StringIO
18
from optparse import OptionParser
19
from sqlobject import StringCol, IntCol, BoolCol, FloatCol, DatabaseIndex
20
from canonical.database.datetimecol import UtcDateTimeCol
21
from canonical.database.constants import UTC_NOW
22
from canonical.launchpad.scripts import db_options, logger_options, logger
23
from canonical.lp import initZopeless
24
from canonical.database.sqlbase import SQLBase
25
from canonical.config import config
26
from canonical.launchpad.mail import simple_sendmail
27
28
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
29
class CheckedLink(SQLBase):
30
    _table = 'CheckedLink'
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
31
    urlname = StringCol(notNull=True)
32
    recursionlevel = IntCol(notNull=True)
33
    parentname = StringCol(notNull=True)
34
    baseref = StringCol(notNull=True)
35
    result = StringCol(notNull=True)
36
    resultcode = IntCol(notNull=True)
37
    warningstring = StringCol(notNull=True)
38
    infostring = StringCol(notNull=True)
39
    valid = BoolCol(notNull=True)
40
    url = StringCol(notNull=True, unique=True, alternateID=True)
41
    line = IntCol(notNull=True)
42
    col = IntCol(notNull=True)
43
    name = StringCol(notNull=True)
44
    dltime = FloatCol()
45
    dlsize = IntCol()
46
    checktime = FloatCol(notNull=True)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
47
    brokensince = UtcDateTimeCol(notNull=False, default=UTC_NOW)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
48
    #cached = BoolCol(notNull=True)
49
50
    resultcode_index = DatabaseIndex('resultcode')
51
    recursionlevel_index = DatabaseIndex('recursionlevel')
52
53
54
def main(csvfile, log):
55
56
    # Where we store broken links
57
    broken = set()
58
59
    # Suck in the csv file, updating the database and adding to the broken set
60
    reader = csv.DictReader(
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
61
            (line.replace('\0','') for line in csvfile
62
                if not line.startswith('#'))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
63
            )
64
    for row in reader:
65
        # Get the result code
2041 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
66
        if row['valid']:
67
            row['resultcode'] = 200
68
            row['result'] = '200 Ok'
69
        else:
70
            m = re.search('^(\d+)', row['result'] or '')
71
            if m is None:
72
                if row['result'] == 'URL is empty':
73
                    continue
74
                elif 'The read operation timed out' in row['result']:
75
                    row['result'] = '601 %s' % row['result']
76
                    row['resultcode'] = 601
77
                else:
78
                    row['result'] = '602 %s' % row['result']
79
                    row['resultcode'] = 602
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
80
            else:
2041 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
81
                row['resultcode'] = int(m.group(1))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
82
83
        # Cast input and nuke crap (to avoid confusing SQLObject)
84
        row['recursionlevel'] = int(row['recursionlevel'])
85
        row['valid'] = row['valid'] in ('True', 'true')
86
        row['line'] = int(row['line'])
87
        row['col'] = int(row['column']) # Renamed - column is a SQL keyword
88
        del row['column']
89
        row['dltime'] = float(row['dltime'])
90
        row['dlsize'] = int(row['dlsize'])
91
        row['checktime'] = float(row['checktime'])
92
        del row['cached']
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
93
        if row['resultcode'] < 400:
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
94
            row['brokensince'] = None
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
95
96
        try:
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
97
            link = CheckedLink.byUrl(row['url'])
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
98
            link.set(**row)
99
        except LookupError:
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
100
            link = CheckedLink(**row)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
101
        broken.add(link)
102
103
    total = len(broken)
104
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
105
    # Delete any entries that were not spidered
4664.1.1 by Curtis Hovey
Normalized comments for bug 3732.
106
    # XXX StuartBishop 2005-07-04: Only if older than a threshold.
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
107
    for link in CheckedLink.select():
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
108
        if link in broken:
109
            continue
110
        link.destroySelf()
111
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
112
    new_broken_links = CheckedLink.select("""
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
113
        resultcode in (404, 500, 601)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
114
        AND brokensince > CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
115
            - '1 day 12 hours'::interval
116
        """, orderBy=["recursionlevel", "parentname", "url"])
117
118
    rep = report("New Arrivals", new_broken_links, total, brokensince=False)
119
120
    old_broken_links = CheckedLink.select("""
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
121
        resultcode in (404, 500, 601)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
122
        AND brokensince <= CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
123
            - '1 day 12 hours'::interval
124
        AND brokensince >
125
            CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
126
        """, orderBy=["recursionlevel", "parentname", "url"])
127
128
    rep += report("Old Favorites", old_broken_links, total, brokensince=True)
129
130
    antique_broken_links = CheckedLink.select("""
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
131
        resultcode in (404, 500, 601)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
132
        AND brokensince <=
133
            CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
134
        """, orderBy=["brokensince", "recursionlevel", "parentname", "url"])
135
136
    rep += report(
137
            "Hall of Shame", antique_broken_links, total, brokensince=True
138
            )
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
139
140
    if not options.email:
141
        # Print to stdout in system encoding - might raise UnicodeError on
142
        # some systems. Tough.
143
        print rep
144
    else:
145
        # Override this setting - we are only here if email explicitly
146
        # requested on the command line.
7178.3.1 by Curtis Hovey
Remove that last of the callsites that mutate the config.
147
        send_email_data = """
148
            [zopeless]
149
            send_email: True
150
            """
151
        config.push('send_email_data', send_email_data)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
152
        simple_sendmail(
153
                "noreply@canonical.com", [options.email], options.subject,
154
                rep, {'Keywords': 'LinkChecker', 'X-Fnord': 'Fnord'}
155
                )
7178.3.1 by Curtis Hovey
Remove that last of the callsites that mutate the config.
156
        config.pop('send_email_data')
157
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
158
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
159
def report(title, links, total, brokensince=True):
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
160
161
    out = StringIO()
162
163
    heading = "%s (%d/%d)" % (title, links.count(), total)
164
    print >> out, heading
165
    print >> out, "=" * len(heading)
166
167
    def print_row(title, value):
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
168
        print >> out, "%-7s: %s" % (title, str(value))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
169
170
    for link in links:
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
171
        print_row("Link", link.url)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
172
        print_row("Parent", link.parentname)
173
        print_row("Result", link.result)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
174
        if link.warningstring:
175
            print_row("Warning", link.warningstring)
176
        if brokensince:
1991 by Canonical.com Patch Queue Manager
[trivial] Add hall of shame
177
            print_row("Since", link.since.strftime('%A %d %B %Y'))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
178
        print >> out
179
    print >> out
180
181
    return out.getvalue()
182
183
184
if __name__ == '__main__':
185
    parser = OptionParser("Usage: %prog [OPTIONS] [input.csv]")
186
    db_options(parser)
187
    logger_options(parser)
188
189
    parser.add_option(
190
            "-c", "--create", action="store_true", dest="create",
191
            default=False, help="Create the database tables"
192
            )
193
194
    parser.add_option(
195
            "-s", "--subject", dest="subject", help="Email using SUBJECT",
196
            metavar="SUBJECT", default="LinkChecker report"
197
            )
198
199
    parser.add_option(
200
            "-t", "--to", dest="email", help="Email to ADDRESS",
201
            metavar="ADDRESS", default=None
202
            )
10303.1.1 by Gary Poster
use newest version of zc.buildout
203
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
204
    options, args = parser.parse_args()
205
206
    log = logger(options)
207
208
    if len(args) == 0 or args[0] == '-':
209
        log.debug("Reading from stdin")
210
        csvfile = sys.stdin
211
    else:
212
        csvfile = open(args[0], 'rb')
213
12415.1.5 by William Grant
Purge implicit_begin/implicitBegin; ignored since Storm.
214
    ztm = initZopeless()
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
215
216
    if options.create:
217
        # Create the table if it doesn't exist. Unfortunately, this is broken
218
        # so we only create the table if requested on the command line
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
219
        CheckedLink.createTable(ifNotExists=True)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
220
221
    main(csvfile, log)
222
    ztm.commit()
223