~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/env python
8452.3.3 by Karl Fogel
* utilities/: Add copyright header block to source files that were
2
#
8687.15.2 by Karl Fogel
In files modified by r8688, change "<YEARS>" to "2009", as per
3
# Copyright 2009 Canonical Ltd.  This software is licensed under the
8687.15.3 by Karl Fogel
Shorten the copyright header block to two lines.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
5
6
"""
7
This script is here to help us discover what the text equivalent of a
8
Roundup numeric field is remotely, without access to the Roundup
9
database.
10
11
It does this by downloading all bugs from the remote bug tracker in
12
CSV format, which gives us numeric values for the fields we're
13
interested in (e.g. status and substatus).
14
15
It then discovers all distinct combinations of those fields then
16
downloads an example bug page for each. It scrapes the bug page to
17
find the text that corresponds to the numeric value we already have.
18
19
There is a race condition. Someone can edit the bug page between the
20
CSV download and the bug page download, so be sure to run this more
21
than once and compare the results.
22
23
To complicate matters, downloaded pages are cached. To redownload the
24
CSV or a bug page the cache file must be deleted. It is a completely
25
non-HTTP compliant cache! This is an aid during development when this
26
script is run many times, and also provides a measure of robustness
27
against errors; there's no need to start from the beginning every
28
time.
29
7403.5.17 by Gavin Panella
Typo.
30
Perhaps the best way to make this work for a new Roundup instance is
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
31
to subclass RoundupSniffer and implement get_text_values() and
32
populate the class-level "fields" variable. See MplayerStatusSniffer
33
for an example.
34
"""
35
36
__metaclass__ = type
37
38
import csv
39
import optparse
40
import sys
41
import urllib2
42
43
from base64 import urlsafe_b64encode
44
from os import mkdir
45
from os.path import join, exists
46
from pprint import pprint
47
from time import sleep
48
from urllib import urlencode
49
50
from BeautifulSoup import BeautifulSoup
51
52
53
class RoundupSniffer:
54
    """Sniffs the meaning of numeric fields in remote Roundups."""
55
56
    fields = ('status',)
57
58
    def __init__(self, base_url, cache_dir):
59
        self.base_url = base_url
60
        self.cache_dir = cache_dir
61
        if not exists(self.cache_dir):
62
            mkdir(self.cache_dir)
63
64
    def fetch(self, url):
65
        """Fetch the URL, consulting the cache first."""
66
        filename = join(self.cache_dir, urlsafe_b64encode(url))
67
        if not exists(filename):
68
            open(filename, 'wb').write(
69
                urllib2.urlopen(url).read())
70
        return open(filename, 'rb')
71
72
    def get_all_bugs(self):
73
        all_fields = ['id']
74
        all_fields.extend(self.fields)
75
        query = [
76
            ('@action', 'export_csv'),
77
            ('@columns', ','.join(all_fields)),
78
            ('@sort', 'activity'),
79
            ('@group', 'priority'),
80
            ('@pagesize', '50'),
81
            ('@startwith', '0'),
82
            ]
83
        url = '%s?%s' % (self.base_url, urlencode(query))
84
        bugs = csv.DictReader(self.fetch(url))
85
        return list(bugs)
86
87
    def get_text_values(self, bug):
88
        raise NotImplementedError(self.get_text_values.func_name)
89
90
91
class MplayerStatusSniffer(RoundupSniffer):
92
    """Sniffer for the Mplayer/FFMpeg Roundup.
93
94
    http://roundup.mplayerhq.hu/roundup/ffmpeg/
95
96
    This looks to be a mostly unmodified instance, so this sniffer may
97
    be useful in general.
98
    """
99
100
    fields = ('status', 'substatus')
101
102
    def get_text_values(self, bug):
103
        """Returns the text of status and substatus for the given bug.
104
105
        This is done by downloading the HTML bug page and scraping it.
106
        """
107
        url = '%s%s' % (self.base_url, bug['id'])
108
        page = self.fetch(url).read()
109
        soup = BeautifulSoup(page)
110
        return tuple(
111
            node.string for node in
112
            soup.find('th', text='Status').findNext('td').findAll('span'))
113
114
115
def get_distinct(things, fields):
116
    """Identify every distinct combination of fields.
117
118
    For each combination also return one example thing.
119
    """
120
    def key(thing):
121
        return tuple(thing[field] for field in fields)
122
    return dict((key(thing), thing) for thing in things)
123
124
125
def gen_mapping(sniffer):
126
    """Generate a mapping from raw field values to text values."""
127
    bugs = sniffer.get_all_bugs()
128
    distinct_bugs = get_distinct(bugs, sniffer.fields)
129
    for raw_values, bug in distinct_bugs.items():
130
        text_values = sniffer.get_text_values(bug)
131
        yield raw_values, text_values
132
133
134
def parse_args(args):
135
    parser = optparse.OptionParser()
136
    parser.add_option(
137
        "--base-url", dest="base_url",
138
        help="The base URL at the remote Roundup instance.",
139
        metavar="URL")
140
    parser.add_option(
141
        "--delay", dest="delay", type="int",
142
        help=("The number of seconds to wait between each page "
143
              "load [default: %default]."))
144
    parser.add_option(
145
        "--cache-dir", dest="cache_dir",
146
        help=("A directory in which to cache fetched resources "
147
              "[default: %default]."),
148
        metavar="DIR")
149
    parser.add_option(
150
        "--sniffer-class", dest="sniffer_class",
151
        help="The sniffer class to use [default: %default].",
152
        metavar="CLASSNAME")
153
    parser.set_defaults(
154
        delay=0, cache_dir="roundup_sniffer_cache",
155
        sniffer_class="MplayerStatusSniffer")
156
157
    options, args = parser.parse_args(args)
158
159
    if not options.base_url:
160
        parser.error("Please specify a base URL.")
161
    if len(args) > 0:
162
        parser.error("Positional arguments are not accepted: %s" %
163
                     ' '.join(args))
164
165
    return options
166
167
168
if __name__ == '__main__':
169
    options = parse_args(sys.argv[1:])
170
    sniffer = eval(options.sniffer_class)(
171
        options.base_url, options.cache_dir)
172
    mapping = {}
173
    for raw, text in gen_mapping(sniffer):
174
        mapping[raw] = text
175
        sleep(options.delay)
176
    pprint(mapping)