10637.3.1
by Guilherme Salgado
Use the default python version instead of a hard-coded version |
1 |
#!/usr/bin/env python
|
8452.3.3
by Karl Fogel
* utilities/: Add copyright header block to source files that were |
2 |
#
|
8687.15.2
by Karl Fogel
In files modified by r8688, change "<YEARS>" to "2009", as per |
3 |
# Copyright 2009 Canonical Ltd. This software is licensed under the
|
8687.15.3
by Karl Fogel
Shorten the copyright header block to two lines. |
4 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
7403.5.13
by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker. |
5 |
|
6 |
"""
|
|
7 |
This script is here to help us discover what the text equivalent of a
|
|
8 |
Roundup numeric field is remotely, without access to the Roundup
|
|
9 |
database.
|
|
10 |
||
11 |
It does this by downloading all bugs from the remote bug tracker in
|
|
12 |
CSV format, which gives us numeric values for the fields we're
|
|
13 |
interested in (e.g. status and substatus).
|
|
14 |
||
15 |
It then discovers all distinct combinations of those fields then
|
|
16 |
downloads an example bug page for each. It scrapes the bug page to
|
|
17 |
find the text that corresponds to the numeric value we already have.
|
|
18 |
||
19 |
There is a race condition. Someone can edit the bug page between the
|
|
20 |
CSV download and the bug page download, so be sure to run this more
|
|
21 |
than once and compare the results.
|
|
22 |
||
23 |
To complicate matters, downloaded pages are cached. To redownload the
|
|
24 |
CSV or a bug page the cache file must be deleted. It is a completely
|
|
25 |
non-HTTP compliant cache! This is an aid during development when this
|
|
26 |
script is run many times, and also provides a measure of robustness
|
|
27 |
against errors; there's no need to start from the beginning every
|
|
28 |
time.
|
|
29 |
||
7403.5.17
by Gavin Panella
Typo. |
30 |
Perhaps the best way to make this work for a new Roundup instance is
|
7403.5.13
by Gavin Panella
Restore the sniffer script. There are better ways to get full information about what statuses a remote Roundup tracker *can support*, but this gives us information about what statuses are actually *used*. It will also work with only anonymous access to the remote tracker. |
31 |
to subclass RoundupSniffer and implement get_text_values() and
|
32 |
populate the class-level "fields" variable. See MplayerStatusSniffer
|
|
33 |
for an example.
|
|
34 |
"""
|
|
35 |
||
36 |
__metaclass__ = type |
|
37 |
||
38 |
import csv |
|
39 |
import optparse |
|
40 |
import sys |
|
41 |
import urllib2 |
|
42 |
||
43 |
from base64 import urlsafe_b64encode |
|
44 |
from os import mkdir |
|
45 |
from os.path import join, exists |
|
46 |
from pprint import pprint |
|
47 |
from time import sleep |
|
48 |
from urllib import urlencode |
|
49 |
||
50 |
from BeautifulSoup import BeautifulSoup |
|
51 |
||
52 |
||
53 |
class RoundupSniffer: |
|
54 |
"""Sniffs the meaning of numeric fields in remote Roundups."""
|
|
55 |
||
56 |
fields = ('status',) |
|
57 |
||
58 |
def __init__(self, base_url, cache_dir): |
|
59 |
self.base_url = base_url |
|
60 |
self.cache_dir = cache_dir |
|
61 |
if not exists(self.cache_dir): |
|
62 |
mkdir(self.cache_dir) |
|
63 |
||
64 |
def fetch(self, url): |
|
65 |
"""Fetch the URL, consulting the cache first."""
|
|
66 |
filename = join(self.cache_dir, urlsafe_b64encode(url)) |
|
67 |
if not exists(filename): |
|
68 |
open(filename, 'wb').write( |
|
69 |
urllib2.urlopen(url).read()) |
|
70 |
return open(filename, 'rb') |
|
71 |
||
72 |
def get_all_bugs(self): |
|
73 |
all_fields = ['id'] |
|
74 |
all_fields.extend(self.fields) |
|
75 |
query = [ |
|
76 |
('@action', 'export_csv'), |
|
77 |
('@columns', ','.join(all_fields)), |
|
78 |
('@sort', 'activity'), |
|
79 |
('@group', 'priority'), |
|
80 |
('@pagesize', '50'), |
|
81 |
('@startwith', '0'), |
|
82 |
]
|
|
83 |
url = '%s?%s' % (self.base_url, urlencode(query)) |
|
84 |
bugs = csv.DictReader(self.fetch(url)) |
|
85 |
return list(bugs) |
|
86 |
||
87 |
def get_text_values(self, bug): |
|
88 |
raise NotImplementedError(self.get_text_values.func_name) |
|
89 |
||
90 |
||
91 |
class MplayerStatusSniffer(RoundupSniffer): |
|
92 |
"""Sniffer for the Mplayer/FFMpeg Roundup.
|
|
93 |
||
94 |
http://roundup.mplayerhq.hu/roundup/ffmpeg/
|
|
95 |
||
96 |
This looks to be a mostly unmodified instance, so this sniffer may
|
|
97 |
be useful in general.
|
|
98 |
"""
|
|
99 |
||
100 |
fields = ('status', 'substatus') |
|
101 |
||
102 |
def get_text_values(self, bug): |
|
103 |
"""Returns the text of status and substatus for the given bug.
|
|
104 |
||
105 |
This is done by downloading the HTML bug page and scraping it.
|
|
106 |
"""
|
|
107 |
url = '%s%s' % (self.base_url, bug['id']) |
|
108 |
page = self.fetch(url).read() |
|
109 |
soup = BeautifulSoup(page) |
|
110 |
return tuple( |
|
111 |
node.string for node in |
|
112 |
soup.find('th', text='Status').findNext('td').findAll('span')) |
|
113 |
||
114 |
||
115 |
def get_distinct(things, fields): |
|
116 |
"""Identify every distinct combination of fields.
|
|
117 |
||
118 |
For each combination also return one example thing.
|
|
119 |
"""
|
|
120 |
def key(thing): |
|
121 |
return tuple(thing[field] for field in fields) |
|
122 |
return dict((key(thing), thing) for thing in things) |
|
123 |
||
124 |
||
125 |
def gen_mapping(sniffer): |
|
126 |
"""Generate a mapping from raw field values to text values."""
|
|
127 |
bugs = sniffer.get_all_bugs() |
|
128 |
distinct_bugs = get_distinct(bugs, sniffer.fields) |
|
129 |
for raw_values, bug in distinct_bugs.items(): |
|
130 |
text_values = sniffer.get_text_values(bug) |
|
131 |
yield raw_values, text_values |
|
132 |
||
133 |
||
134 |
def parse_args(args): |
|
135 |
parser = optparse.OptionParser() |
|
136 |
parser.add_option( |
|
137 |
"--base-url", dest="base_url", |
|
138 |
help="The base URL at the remote Roundup instance.", |
|
139 |
metavar="URL") |
|
140 |
parser.add_option( |
|
141 |
"--delay", dest="delay", type="int", |
|
142 |
help=("The number of seconds to wait between each page " |
|
143 |
"load [default: %default].")) |
|
144 |
parser.add_option( |
|
145 |
"--cache-dir", dest="cache_dir", |
|
146 |
help=("A directory in which to cache fetched resources " |
|
147 |
"[default: %default]."), |
|
148 |
metavar="DIR") |
|
149 |
parser.add_option( |
|
150 |
"--sniffer-class", dest="sniffer_class", |
|
151 |
help="The sniffer class to use [default: %default].", |
|
152 |
metavar="CLASSNAME") |
|
153 |
parser.set_defaults( |
|
154 |
delay=0, cache_dir="roundup_sniffer_cache", |
|
155 |
sniffer_class="MplayerStatusSniffer") |
|
156 |
||
157 |
options, args = parser.parse_args(args) |
|
158 |
||
159 |
if not options.base_url: |
|
160 |
parser.error("Please specify a base URL.") |
|
161 |
if len(args) > 0: |
|
162 |
parser.error("Positional arguments are not accepted: %s" % |
|
163 |
' '.join(args)) |
|
164 |
||
165 |
return options |
|
166 |
||
167 |
||
168 |
if __name__ == '__main__': |
|
169 |
options = parse_args(sys.argv[1:]) |
|
170 |
sniffer = eval(options.sniffer_class)( |
|
171 |
options.base_url, options.cache_dir) |
|
172 |
mapping = {} |
|
173 |
for raw, text in gen_mapping(sniffer): |
|
174 |
mapping[raw] = text |
|
175 |
sleep(options.delay) |
|
176 |
pprint(mapping) |