~launchpad-pqm/launchpad/devel

7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
1
#!/usr/bin/env python2.5
2
3
"""
4
This script is here to help us discover what the text equivalent of a
5
Roundup numeric field is remotely, without access to the Roundup
6
database.
7
8
It does this by downloading all bugs from the remote bug tracker in
9
CSV format, which gives us numeric values for the fields we're
10
interested in (e.g. status and substatus).
11
12
It then discovers all distinct combinations of those fields then
13
downloads an example bug page for each. It scrapes the bug page to
14
find the text that corresponds to the numeric value we already have.
15
16
There is a race condition. Someone can edit the bug page between the
17
CSV download and the bug page download, so be sure to run this more
18
than once and compare the results.
19
20
To complicate matters, downloaded pages are cached. To redownload the
21
CSV or a bug page the cache file must be deleted. It is a completely
22
non-HTTP compliant cache! This is an aid during development when this
23
script is run many times, and also provides a measure of robustness
24
against errors; there's no need to start from the beginning every
25
time.
26
7403.5.17 by Gavin Panella
Typo.
27
Perhaps the best way to make this work for a new Roundup instance is
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
28
to subclass RoundupSniffer and implement get_text_values() and
29
populate the class-level "fields" variable. See MplayerStatusSniffer
30
for an example.
31
"""
32
33
__metaclass__ = type
34
35
import csv
36
import optparse
37
import sys
38
import urllib2
39
40
from base64 import urlsafe_b64encode
41
from os import mkdir
42
from os.path import join, exists
43
from pprint import pprint
44
from time import sleep
45
from urllib import urlencode
46
47
from BeautifulSoup import BeautifulSoup
48
49
50
class RoundupSniffer:
51
    """Sniffs the meaning of numeric fields in remote Roundups."""
52
53
    fields = ('status',)
54
55
    def __init__(self, base_url, cache_dir):
56
        self.base_url = base_url
57
        self.cache_dir = cache_dir
58
        if not exists(self.cache_dir):
59
            mkdir(self.cache_dir)
60
61
    def fetch(self, url):
62
        """Fetch the URL, consulting the cache first."""
63
        filename = join(self.cache_dir, urlsafe_b64encode(url))
64
        if not exists(filename):
65
            open(filename, 'wb').write(
66
                urllib2.urlopen(url).read())
67
        return open(filename, 'rb')
68
69
    def get_all_bugs(self):
70
        all_fields = ['id']
71
        all_fields.extend(self.fields)
72
        query = [
73
            ('@action', 'export_csv'),
74
            ('@columns', ','.join(all_fields)),
75
            ('@sort', 'activity'),
76
            ('@group', 'priority'),
77
            ('@pagesize', '50'),
78
            ('@startwith', '0'),
79
            ]
80
        url = '%s?%s' % (self.base_url, urlencode(query))
81
        bugs = csv.DictReader(self.fetch(url))
82
        return list(bugs)
83
84
    def get_text_values(self, bug):
85
        raise NotImplementedError(self.get_text_values.func_name)
86
87
88
class MplayerStatusSniffer(RoundupSniffer):
89
    """Sniffer for the Mplayer/FFMpeg Roundup.
90
91
    http://roundup.mplayerhq.hu/roundup/ffmpeg/
92
93
    This looks to be a mostly unmodified instance, so this sniffer may
94
    be useful in general.
95
    """
96
97
    fields = ('status', 'substatus')
98
99
    def get_text_values(self, bug):
100
        """Returns the text of status and substatus for the given bug.
101
102
        This is done by downloading the HTML bug page and scraping it.
103
        """
104
        url = '%s%s' % (self.base_url, bug['id'])
105
        page = self.fetch(url).read()
106
        soup = BeautifulSoup(page)
107
        return tuple(
108
            node.string for node in
109
            soup.find('th', text='Status').findNext('td').findAll('span'))
110
111
112
def get_distinct(things, fields):
113
    """Identify every distinct combination of fields.
114
115
    For each combination also return one example thing.
116
    """
117
    def key(thing):
118
        return tuple(thing[field] for field in fields)
119
    return dict((key(thing), thing) for thing in things)
120
121
122
def gen_mapping(sniffer):
123
    """Generate a mapping from raw field values to text values."""
124
    bugs = sniffer.get_all_bugs()
125
    distinct_bugs = get_distinct(bugs, sniffer.fields)
126
    for raw_values, bug in distinct_bugs.items():
127
        text_values = sniffer.get_text_values(bug)
128
        yield raw_values, text_values
129
130
131
def parse_args(args):
132
    parser = optparse.OptionParser()
133
    parser.add_option(
134
        "--base-url", dest="base_url",
135
        help="The base URL at the remote Roundup instance.",
136
        metavar="URL")
137
    parser.add_option(
138
        "--delay", dest="delay", type="int",
139
        help=("The number of seconds to wait between each page "
140
              "load [default: %default]."))
141
    parser.add_option(
142
        "--cache-dir", dest="cache_dir",
143
        help=("A directory in which to cache fetched resources "
144
              "[default: %default]."),
145
        metavar="DIR")
146
    parser.add_option(
147
        "--sniffer-class", dest="sniffer_class",
148
        help="The sniffer class to use [default: %default].",
149
        metavar="CLASSNAME")
150
    parser.set_defaults(
151
        delay=0, cache_dir="roundup_sniffer_cache",
152
        sniffer_class="MplayerStatusSniffer")
153
154
    options, args = parser.parse_args(args)
155
156
    if not options.base_url:
157
        parser.error("Please specify a base URL.")
158
    if len(args) > 0:
159
        parser.error("Positional arguments are not accepted: %s" %
160
                     ' '.join(args))
161
162
    return options
163
164
165
if __name__ == '__main__':
166
    options = parse_args(sys.argv[1:])
167
    sniffer = eval(options.sniffer_class)(
168
        options.base_url, options.cache_dir)
169
    mapping = {}
170
    for raw, text in gen_mapping(sniffer):
171
        mapping[raw] = text
172
        sleep(options.delay)
173
    pprint(mapping)