~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/env python
8452.3.3 by Karl Fogel
* utilities/: Add copyright header block to source files that were
2
#
8687.15.2 by Karl Fogel
In files modified by r8688, change "<YEARS>" to "2009", as per
3
# Copyright 2009 Canonical Ltd.  This software is licensed under the
8687.15.3 by Karl Fogel
Shorten the copyright header block to two lines.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
5
6
"""
7
This script is here to help us discover what the text equivalent of a
8
Roundup numeric field is remotely, without access to the Roundup
9
database.
10
11
It does this by downloading all bugs from the remote bug tracker in
12
CSV format, which gives us numeric values for the fields we're
13
interested in (e.g. status and substatus).
14
15
It then discovers all distinct combinations of those fields then
16
downloads an example bug page for each. It scrapes the bug page to
17
find the text that corresponds to the numeric value we already have.
18
19
There is a race condition. Someone can edit the bug page between the
20
CSV download and the bug page download, so be sure to run this more
21
than once and compare the results.
22
23
To complicate matters, downloaded pages are cached. To redownload the
24
CSV or a bug page the cache file must be deleted. It is a completely
25
non-HTTP compliant cache! This is an aid during development when this
26
script is run many times, and also provides a measure of robustness
27
against errors; there's no need to start from the beginning every
28
time.
29
7403.5.17 by Gavin Panella
Typo.
30
Perhaps the best way to make this work for a new Roundup instance is
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
31
to subclass RoundupSniffer and implement get_text_values() and
32
populate the class-level "fields" variable. See MplayerStatusSniffer
33
for an example.
34
"""
35
36
__metaclass__ = type
37
14612.2.6 by William Grant
utilities
38
from base64 import urlsafe_b64encode
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
39
import csv
40
import optparse
41
from os import mkdir
14612.2.6 by William Grant
utilities
42
from os.path import (
43
    exists,
44
    join,
45
    )
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
46
from pprint import pprint
14612.2.6 by William Grant
utilities
47
import sys
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
48
from time import sleep
49
from urllib import urlencode
14612.2.6 by William Grant
utilities
50
import urllib2
7403.5.13 by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker.
51
52
from BeautifulSoup import BeautifulSoup
53
54
55
class RoundupSniffer:
56
    """Sniffs the meaning of numeric fields in remote Roundups."""
57
58
    fields = ('status',)
59
60
    def __init__(self, base_url, cache_dir):
61
        self.base_url = base_url
62
        self.cache_dir = cache_dir
63
        if not exists(self.cache_dir):
64
            mkdir(self.cache_dir)
65
66
    def fetch(self, url):
67
        """Fetch the URL, consulting the cache first."""
68
        filename = join(self.cache_dir, urlsafe_b64encode(url))
69
        if not exists(filename):
70
            open(filename, 'wb').write(
71
                urllib2.urlopen(url).read())
72
        return open(filename, 'rb')
73
74
    def get_all_bugs(self):
75
        all_fields = ['id']
76
        all_fields.extend(self.fields)
77
        query = [
78
            ('@action', 'export_csv'),
79
            ('@columns', ','.join(all_fields)),
80
            ('@sort', 'activity'),
81
            ('@group', 'priority'),
82
            ('@pagesize', '50'),
83
            ('@startwith', '0'),
84
            ]
85
        url = '%s?%s' % (self.base_url, urlencode(query))
86
        bugs = csv.DictReader(self.fetch(url))
87
        return list(bugs)
88
89
    def get_text_values(self, bug):
90
        raise NotImplementedError(self.get_text_values.func_name)
91
92
93
class MplayerStatusSniffer(RoundupSniffer):
94
    """Sniffer for the Mplayer/FFMpeg Roundup.
95
96
    http://roundup.mplayerhq.hu/roundup/ffmpeg/
97
98
    This looks to be a mostly unmodified instance, so this sniffer may
99
    be useful in general.
100
    """
101
102
    fields = ('status', 'substatus')
103
104
    def get_text_values(self, bug):
105
        """Returns the text of status and substatus for the given bug.
106
107
        This is done by downloading the HTML bug page and scraping it.
108
        """
109
        url = '%s%s' % (self.base_url, bug['id'])
110
        page = self.fetch(url).read()
111
        soup = BeautifulSoup(page)
112
        return tuple(
113
            node.string for node in
114
            soup.find('th', text='Status').findNext('td').findAll('span'))
115
116
117
def get_distinct(things, fields):
118
    """Identify every distinct combination of fields.
119
120
    For each combination also return one example thing.
121
    """
122
    def key(thing):
123
        return tuple(thing[field] for field in fields)
124
    return dict((key(thing), thing) for thing in things)
125
126
127
def gen_mapping(sniffer):
128
    """Generate a mapping from raw field values to text values."""
129
    bugs = sniffer.get_all_bugs()
130
    distinct_bugs = get_distinct(bugs, sniffer.fields)
131
    for raw_values, bug in distinct_bugs.items():
132
        text_values = sniffer.get_text_values(bug)
133
        yield raw_values, text_values
134
135
136
def parse_args(args):
137
    parser = optparse.OptionParser()
138
    parser.add_option(
139
        "--base-url", dest="base_url",
140
        help="The base URL at the remote Roundup instance.",
141
        metavar="URL")
142
    parser.add_option(
143
        "--delay", dest="delay", type="int",
144
        help=("The number of seconds to wait between each page "
145
              "load [default: %default]."))
146
    parser.add_option(
147
        "--cache-dir", dest="cache_dir",
148
        help=("A directory in which to cache fetched resources "
149
              "[default: %default]."),
150
        metavar="DIR")
151
    parser.add_option(
152
        "--sniffer-class", dest="sniffer_class",
153
        help="The sniffer class to use [default: %default].",
154
        metavar="CLASSNAME")
155
    parser.set_defaults(
156
        delay=0, cache_dir="roundup_sniffer_cache",
157
        sniffer_class="MplayerStatusSniffer")
158
159
    options, args = parser.parse_args(args)
160
161
    if not options.base_url:
162
        parser.error("Please specify a base URL.")
163
    if len(args) > 0:
164
        parser.error("Positional arguments are not accepted: %s" %
165
                     ' '.join(args))
166
167
    return options
168
169
170
if __name__ == '__main__':
171
    options = parse_args(sys.argv[1:])
172
    sniffer = eval(options.sniffer_class)(
173
        options.base_url, options.cache_dir)
174
    mapping = {}
175
    for raw, text in gen_mapping(sniffer):
176
        mapping[raw] = text
177
        sleep(options.delay)
178
    pprint(mapping)