~launchpad-pqm/launchpad/devel

3944.1.1 by Francis J. Lacoste
Use system version python2.4 for scripts.
1
#!/usr/bin/python2.4
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
2
# Copyright 2004-2005 Canonical Ltd.  All rights reserved.
3
"""
4
Process LinkChecker .csv results for the staging server, stuff them into
5
a database and generate a report suitable for spamming developers with.
6
"""
7
8
__metaclass__ = type
9
7178.3.1 by Curtis Hovey
Remove that last of the callsites that mutate the config.
10
# pylint: disable-msg=W0403
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
11
import _pythonpath
12
13
import csv, re, sys
14
from StringIO import StringIO
15
from optparse import OptionParser
16
from sqlobject import StringCol, IntCol, BoolCol, FloatCol, DatabaseIndex
17
from canonical.database.datetimecol import UtcDateTimeCol
18
from canonical.database.constants import UTC_NOW
19
from canonical.launchpad.scripts import db_options, logger_options, logger
20
from canonical.lp import initZopeless
21
from canonical.database.sqlbase import SQLBase
22
from canonical.config import config
23
from canonical.launchpad.mail import simple_sendmail
24
25
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
26
class CheckedLink(SQLBase):
27
    _table = 'CheckedLink'
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
28
    urlname = StringCol(notNull=True)
29
    recursionlevel = IntCol(notNull=True)
30
    parentname = StringCol(notNull=True)
31
    baseref = StringCol(notNull=True)
32
    result = StringCol(notNull=True)
33
    resultcode = IntCol(notNull=True)
34
    warningstring = StringCol(notNull=True)
35
    infostring = StringCol(notNull=True)
36
    valid = BoolCol(notNull=True)
37
    url = StringCol(notNull=True, unique=True, alternateID=True)
38
    line = IntCol(notNull=True)
39
    col = IntCol(notNull=True)
40
    name = StringCol(notNull=True)
41
    dltime = FloatCol()
42
    dlsize = IntCol()
43
    checktime = FloatCol(notNull=True)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
44
    brokensince = UtcDateTimeCol(notNull=False, default=UTC_NOW)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
45
    #cached = BoolCol(notNull=True)
46
47
    resultcode_index = DatabaseIndex('resultcode')
48
    recursionlevel_index = DatabaseIndex('recursionlevel')
49
50
51
def main(csvfile, log):
52
53
    # Where we store broken links
54
    broken = set()
55
56
    # Suck in the csv file, updating the database and adding to the broken set
57
    reader = csv.DictReader(
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
58
            (line.replace('\0','') for line in csvfile
59
                if not line.startswith('#'))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
60
            )
61
    for row in reader:
62
        # Get the result code
2041 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
63
        if row['valid']:
64
            row['resultcode'] = 200
65
            row['result'] = '200 Ok'
66
        else:
67
            m = re.search('^(\d+)', row['result'] or '')
68
            if m is None:
69
                if row['result'] == 'URL is empty':
70
                    continue
71
                elif 'The read operation timed out' in row['result']:
72
                    row['result'] = '601 %s' % row['result']
73
                    row['resultcode'] = 601
74
                else:
75
                    row['result'] = '602 %s' % row['result']
76
                    row['resultcode'] = 602
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
77
            else:
2041 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
78
                row['resultcode'] = int(m.group(1))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
79
80
        # Cast input and nuke crap (to avoid confusing SQLObject)
81
        row['recursionlevel'] = int(row['recursionlevel'])
82
        row['valid'] = row['valid'] in ('True', 'true')
83
        row['line'] = int(row['line'])
84
        row['col'] = int(row['column']) # Renamed - column is a SQL keyword
85
        del row['column']
86
        row['dltime'] = float(row['dltime'])
87
        row['dlsize'] = int(row['dlsize'])
88
        row['checktime'] = float(row['checktime'])
89
        del row['cached']
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
90
        if row['resultcode'] < 400:
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
91
            row['brokensince'] = None
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
92
93
        try:
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
94
            link = CheckedLink.byUrl(row['url'])
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
95
            link.set(**row)
96
        except LookupError:
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
97
            link = CheckedLink(**row)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
98
        broken.add(link)
99
100
    total = len(broken)
101
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
102
    # Delete any entries that were not spidered
4664.1.1 by Curtis Hovey
Normalized comments for bug 3732.
103
    # XXX StuartBishop 2005-07-04: Only if older than a threshold.
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
104
    for link in CheckedLink.select():
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
105
        if link in broken:
106
            continue
107
        link.destroySelf()
108
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
109
    new_broken_links = CheckedLink.select("""
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
110
        resultcode in (404, 500, 601)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
111
        AND brokensince > CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
112
            - '1 day 12 hours'::interval
113
        """, orderBy=["recursionlevel", "parentname", "url"])
114
115
    rep = report("New Arrivals", new_broken_links, total, brokensince=False)
116
117
    old_broken_links = CheckedLink.select("""
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
118
        resultcode in (404, 500, 601)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
119
        AND brokensince <= CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
120
            - '1 day 12 hours'::interval
121
        AND brokensince >
122
            CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
123
        """, orderBy=["recursionlevel", "parentname", "url"])
124
125
    rep += report("Old Favorites", old_broken_links, total, brokensince=True)
126
127
    antique_broken_links = CheckedLink.select("""
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
128
        resultcode in (404, 500, 601)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
129
        AND brokensince <=
130
            CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - '14 days'::interval
131
        """, orderBy=["brokensince", "recursionlevel", "parentname", "url"])
132
133
    rep += report(
134
            "Hall of Shame", antique_broken_links, total, brokensince=True
135
            )
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
136
137
    if not options.email:
138
        # Print to stdout in system encoding - might raise UnicodeError on
139
        # some systems. Tough.
140
        print rep
141
    else:
142
        # Override this setting - we are only here if email explicitly
143
        # requested on the command line.
7178.3.1 by Curtis Hovey
Remove that last of the callsites that mutate the config.
144
        send_email_data = """
145
            [zopeless]
146
            send_email: True
147
            """
148
        config.push('send_email_data', send_email_data)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
149
        simple_sendmail(
150
                "noreply@canonical.com", [options.email], options.subject,
151
                rep, {'Keywords': 'LinkChecker', 'X-Fnord': 'Fnord'}
152
                )
7178.3.1 by Curtis Hovey
Remove that last of the callsites that mutate the config.
153
        config.pop('send_email_data')
154
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
155
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
156
def report(title, links, total, brokensince=True):
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
157
158
    out = StringIO()
159
160
    heading = "%s (%d/%d)" % (title, links.count(), total)
161
    print >> out, heading
162
    print >> out, "=" * len(heading)
163
164
    def print_row(title, value):
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
165
        print >> out, "%-7s: %s" % (title, str(value))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
166
167
    for link in links:
2034 by Canonical.com Patch Queue Manager
[trivial] linkreport.py tweaks
168
        print_row("Link", link.url)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
169
        print_row("Parent", link.parentname)
170
        print_row("Result", link.result)
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
171
        if link.warningstring:
172
            print_row("Warning", link.warningstring)
173
        if brokensince:
1991 by Canonical.com Patch Queue Manager
[trivial] Add hall of shame
174
            print_row("Since", link.since.strftime('%A %d %B %Y'))
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
175
        print >> out
176
    print >> out
177
178
    return out.getvalue()
179
180
181
if __name__ == '__main__':
182
    parser = OptionParser("Usage: %prog [OPTIONS] [input.csv]")
183
    db_options(parser)
184
    logger_options(parser)
185
186
    parser.add_option(
187
            "-c", "--create", action="store_true", dest="create",
188
            default=False, help="Create the database tables"
189
            )
190
191
    parser.add_option(
192
            "-s", "--subject", dest="subject", help="Email using SUBJECT",
193
            metavar="SUBJECT", default="LinkChecker report"
194
            )
195
196
    parser.add_option(
197
            "-t", "--to", dest="email", help="Email to ADDRESS",
198
            metavar="ADDRESS", default=None
199
            )
200
    
201
    options, args = parser.parse_args()
202
203
    log = logger(options)
204
205
    if len(args) == 0 or args[0] == '-':
206
        log.debug("Reading from stdin")
207
        csvfile = sys.stdin
208
    else:
209
        csvfile = open(args[0], 'rb')
210
211
    ztm = initZopeless(implicitBegin=True)
212
213
    if options.create:
214
        # Create the table if it doesn't exist. Unfortunately, this is broken
215
        # so we only create the table if requested on the command line
2027 by Canonical.com Patch Queue Manager
[trivial] linkchecker report tweaks
216
        CheckedLink.createTable(ifNotExists=True)
1990 by Canonical.com Patch Queue Manager
[r=spiv] Spam launchpad developers with errors picked up by LinkChecker
217
218
    main(csvfile, log)
219
    ztm.commit()
220