12398.2.1
by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests. |
1 |
# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
|
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
2 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
3 |
||
10662.1.3
by Jonathan Lange
Move highly-general synchronization function to services |
4 |
"""Generic Python utilities.
|
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
5 |
|
10662.1.3
by Jonathan Lange
Move highly-general synchronization function to services |
6 |
Functions, lists and so forth. Nothing here that does system calls or network
|
7 |
stuff.
|
|
8 |
"""
|
|
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
9 |
|
10 |
__metaclass__ = type |
|
11 |
__all__ = [ |
|
12398.2.1
by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests. |
12 |
'AutoDecorate', |
12398.2.4
by Jonathan Lange
Move 'base' into lp.services |
13 |
'base', |
10652.2.1
by Tim Penhey
Add in a caching iterator. |
14 |
'CachingIterator', |
12398.2.7
by Jonathan Lange
Really remove autodecorate.txt doctest and actually export compress_hash. |
15 |
'compress_hash', |
11149.2.4
by Jonathan Lange
Add a decorate_with |
16 |
'decorate_with', |
12156.15.12
by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests) |
17 |
'docstring_dedent', |
7675.1090.6
by Jeroen Vermeulen
Testing. |
18 |
'file_exists', |
12685.5.1
by Jonathan Lange
Remove some old workarounds and move some code from one place to a better place. |
19 |
'iter_list_chunks', |
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
20 |
'iter_split', |
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
21 |
'load_bz2_pickle', |
13405.9.1
by Henning Eggers
Restored r13373. |
22 |
'obfuscate_email', |
14265.2.1
by Aaron Bentley
Implement obfuscation for structures of basic types. |
23 |
'obfuscate_structure', |
13405.9.1
by Henning Eggers
Restored r13373. |
24 |
're_email_address', |
12392.1.10
by Jonathan Lange
Add a helper. |
25 |
'run_capturing_output', |
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
26 |
'save_bz2_pickle', |
10662.1.3
by Jonathan Lange
Move highly-general synchronization function to services |
27 |
'synchronize', |
10541.1.3
by Tim Penhey
Move the functions around again. |
28 |
'text_delta', |
12336.2.8
by Gavin Panella
Move traceback_info to lp.services.utils. |
29 |
'traceback_info', |
13333.5.2
by Jonathan Lange
Add utc_now |
30 |
'utc_now', |
10541.1.3
by Tim Penhey
Move the functions around again. |
31 |
'value_string', |
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
32 |
]
|
33 |
||
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
34 |
import bz2 |
13333.5.2
by Jonathan Lange
Add utc_now |
35 |
from datetime import datetime |
12336.1.2
by Gavin Panella
Format imports. |
36 |
from itertools import tee |
7675.1090.6
by Jeroen Vermeulen
Testing. |
37 |
import os |
13405.9.1
by Henning Eggers
Restored r13373. |
38 |
import re |
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
39 |
import string |
12392.1.10
by Jonathan Lange
Add a helper. |
40 |
from StringIO import StringIO |
12336.2.15
by Gavin Panella
Merge devel, resolving 1 conflict. |
41 |
import sys |
12156.15.12
by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests) |
42 |
from textwrap import dedent |
12398.2.1
by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests. |
43 |
from types import FunctionType |
10541.1.4
by Tim Penhey
Fix some pedantry. |
44 |
|
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
45 |
import cPickle as pickle |
12392.1.10
by Jonathan Lange
Add a helper. |
46 |
from fixtures import ( |
47 |
Fixture, |
|
48 |
MonkeyPatch, |
|
49 |
)
|
|
10541.1.4
by Tim Penhey
Fix some pedantry. |
50 |
from lazr.enum import BaseItem |
13333.5.2
by Jonathan Lange
Add utc_now |
51 |
import pytz |
11149.2.4
by Jonathan Lange
Add a decorate_with |
52 |
from twisted.python.util import mergeFunctionMetadata |
10541.1.3
by Tim Penhey
Move the functions around again. |
53 |
from zope.security.proxy import isinstance as zope_isinstance |
54 |
||
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
55 |
|
12398.2.1
by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests. |
56 |
def AutoDecorate(*decorators): |
57 |
"""Factory to generate metaclasses that automatically apply decorators.
|
|
58 |
||
59 |
AutoDecorate is a metaclass factory that can be used to make a class
|
|
60 |
implicitly wrap all of its methods with one or more decorators.
|
|
61 |
"""
|
|
62 |
||
63 |
class AutoDecorateMetaClass(type): |
|
7675.1090.6
by Jeroen Vermeulen
Testing. |
64 |
|
12398.2.1
by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests. |
65 |
def __new__(cls, class_name, bases, class_dict): |
66 |
new_class_dict = {} |
|
67 |
for name, value in class_dict.items(): |
|
68 |
if type(value) == FunctionType: |
|
69 |
for decorator in decorators: |
|
70 |
value = decorator(value) |
|
71 |
assert callable(value), ( |
|
72 |
"Decorator %s didn't return a callable." |
|
73 |
% repr(decorator)) |
|
74 |
new_class_dict[name] = value |
|
75 |
return type.__new__(cls, class_name, bases, new_class_dict) |
|
76 |
||
77 |
return AutoDecorateMetaClass |
|
78 |
||
79 |
||
12398.2.4
by Jonathan Lange
Move 'base' into lp.services |
80 |
def base(number, radix): |
81 |
"""Convert 'number' to an arbitrary base numbering scheme, 'radix'.
|
|
82 |
||
83 |
This function is based on work from the Python Cookbook and is under the
|
|
84 |
Python license.
|
|
85 |
||
86 |
Inverse function to int(str, radix) and long(str, radix)
|
|
87 |
"""
|
|
88 |
if not 2 <= radix <= 62: |
|
12398.2.16
by Jonathan Lange
Respond to review comments |
89 |
raise ValueError("radix must be between 2 and 62: %s" % (radix,)) |
90 |
||
91 |
if number < 0: |
|
92 |
raise ValueError("number must be non-negative: %s" % (number,)) |
|
12398.2.4
by Jonathan Lange
Move 'base' into lp.services |
93 |
|
94 |
result = [] |
|
95 |
addon = result.append |
|
12398.2.16
by Jonathan Lange
Respond to review comments |
96 |
if number == 0: |
12398.2.4
by Jonathan Lange
Move 'base' into lp.services |
97 |
addon('0') |
98 |
||
99 |
ABC = string.digits + string.ascii_letters |
|
100 |
while number: |
|
101 |
number, rdigit = divmod(number, radix) |
|
102 |
addon(ABC[rdigit]) |
|
103 |
||
104 |
result.reverse() |
|
105 |
return ''.join(result) |
|
106 |
||
107 |
||
108 |
def compress_hash(hash_obj): |
|
109 |
"""Compress a hash_obj using `base`.
|
|
110 |
||
111 |
Given an ``md5`` or ``sha1`` hash object, compress it down to either 22 or
|
|
112 |
27 characters in a way that's safe to be used in URLs. Takes the hex of
|
|
113 |
the hash and converts it to base 62.
|
|
114 |
"""
|
|
115 |
return base(int(hash_obj.hexdigest(), 16), 62) |
|
116 |
||
117 |
||
9719.2.1
by Michael Hudson
move iter_split somewhere generic |
118 |
def iter_split(string, splitter): |
119 |
"""Iterate over ways to split 'string' in two with 'splitter'.
|
|
120 |
||
121 |
If 'string' is empty, then yield nothing. Otherwise, yield tuples like
|
|
122 |
('a/b/c', ''), ('a/b', 'c'), ('a', 'b/c') for a string 'a/b/c' and a
|
|
123 |
splitter '/'.
|
|
124 |
||
125 |
The tuples are yielded such that the first tuple has everything in the
|
|
126 |
first tuple. With each iteration, the first element gets smaller and the
|
|
127 |
second gets larger. It stops iterating just before it would have to yield
|
|
128 |
('', 'a/b/c').
|
|
129 |
"""
|
|
130 |
if string == '': |
|
131 |
return
|
|
132 |
tokens = string.split(splitter) |
|
133 |
for i in reversed(range(1, len(tokens) + 1)): |
|
134 |
yield splitter.join(tokens[:i]), splitter.join(tokens[i:]) |
|
10541.1.3
by Tim Penhey
Move the functions around again. |
135 |
|
136 |
||
12685.5.1
by Jonathan Lange
Remove some old workarounds and move some code from one place to a better place. |
137 |
def iter_list_chunks(a_list, size): |
138 |
"""Iterate over `a_list` in chunks of size `size`.
|
|
139 |
||
140 |
I'm amazed this isn't in itertools (mwhudson).
|
|
141 |
"""
|
|
142 |
for i in range(0, len(a_list), size): |
|
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
143 |
yield a_list[i:i + size] |
12685.5.1
by Jonathan Lange
Remove some old workarounds and move some code from one place to a better place. |
144 |
|
145 |
||
10662.1.3
by Jonathan Lange
Move highly-general synchronization function to services |
146 |
def synchronize(source, target, add, remove): |
147 |
"""Update 'source' to match 'target' using 'add' and 'remove'.
|
|
148 |
||
149 |
Changes the container 'source' so that it equals 'target', calling 'add'
|
|
150 |
with any object in 'target' not in 'source' and 'remove' with any object
|
|
151 |
not in 'target' but in 'source'.
|
|
152 |
"""
|
|
153 |
need_to_add = [obj for obj in target if obj not in source] |
|
154 |
need_to_remove = [obj for obj in source if obj not in target] |
|
155 |
for obj in need_to_add: |
|
156 |
add(obj) |
|
157 |
for obj in need_to_remove: |
|
158 |
remove(obj) |
|
159 |
||
160 |
||
10541.1.3
by Tim Penhey
Move the functions around again. |
161 |
def value_string(item): |
10541.1.4
by Tim Penhey
Fix some pedantry. |
162 |
"""Return a unicode string representing value.
|
163 |
||
164 |
This text is special cased for enumerated types.
|
|
165 |
"""
|
|
10541.1.3
by Tim Penhey
Move the functions around again. |
166 |
if item is None: |
167 |
return '(not set)' |
|
168 |
elif zope_isinstance(item, BaseItem): |
|
169 |
return item.title |
|
170 |
else: |
|
171 |
return unicode(item) |
|
172 |
||
173 |
||
174 |
def text_delta(instance_delta, delta_names, state_names, interface): |
|
175 |
"""Return a textual delta for a Delta object.
|
|
176 |
||
177 |
A list of strings is returned.
|
|
178 |
||
179 |
Only modified members of the delta will be shown.
|
|
180 |
||
181 |
:param instance_delta: The delta to generate a textual representation of.
|
|
182 |
:param delta_names: The names of all members to show changes to.
|
|
183 |
:param state_names: The names of all members to show only the new state
|
|
184 |
of.
|
|
185 |
:param interface: The Zope interface that the input delta compared.
|
|
186 |
"""
|
|
187 |
output = [] |
|
188 |
indent = ' ' * 4 |
|
189 |
||
190 |
# Fields for which we have old and new values.
|
|
191 |
for field_name in delta_names: |
|
192 |
delta = getattr(instance_delta, field_name, None) |
|
193 |
if delta is None: |
|
194 |
continue
|
|
195 |
title = interface[field_name].title |
|
196 |
old_item = value_string(delta['old']) |
|
197 |
new_item = value_string(delta['new']) |
|
198 |
output.append("%s%s: %s => %s" % (indent, title, old_item, new_item)) |
|
199 |
for field_name in state_names: |
|
200 |
delta = getattr(instance_delta, field_name, None) |
|
201 |
if delta is None: |
|
202 |
continue
|
|
203 |
title = interface[field_name].title |
|
204 |
if output: |
|
205 |
output.append('') |
|
206 |
output.append('%s changed to:\n\n%s' % (title, delta)) |
|
207 |
return '\n'.join(output) |
|
10652.2.1
by Tim Penhey
Add in a caching iterator. |
208 |
|
209 |
||
210 |
class CachingIterator: |
|
211 |
"""Remember the items extracted from the iterator for the next iteration.
|
|
212 |
||
213 |
Some generators and iterators are expensive to calculate, like calculating
|
|
214 |
the merge sorted revision graph for a bazaar branch, so you don't want to
|
|
215 |
call them too often. Rearranging the code so it doesn't call the
|
|
10652.2.6
by Tim Penhey
More cleanup. |
216 |
expensive iterator can make the code awkward. This class provides a way
|
217 |
to have the iterator called once, and the results stored. The results
|
|
10652.2.1
by Tim Penhey
Add in a caching iterator. |
218 |
can then be iterated over again, and more values retrieved from the
|
219 |
iterator if necessary.
|
|
220 |
"""
|
|
10652.2.6
by Tim Penhey
More cleanup. |
221 |
|
10652.2.1
by Tim Penhey
Add in a caching iterator. |
222 |
def __init__(self, iterator): |
223 |
self.iterator = iterator |
|
10652.2.6
by Tim Penhey
More cleanup. |
224 |
|
10652.2.1
by Tim Penhey
Add in a caching iterator. |
225 |
def __iter__(self): |
12336.1.1
by Gavin Panella
Use itertools.tee() to simplify and almost certainly speed up CachingIterator. |
226 |
# Teeing an iterator previously returned by tee won't cause heat
|
227 |
# death. See tee_copy in itertoolsmodule.c in the Python source.
|
|
228 |
self.iterator, iterator = tee(self.iterator) |
|
229 |
return iterator |
|
11149.2.3
by Jonathan Lange
Add a new 'run_with' method. |
230 |
|
231 |
||
11149.2.10
by Jonathan Lange
Change the interface of decorate_with so we can call it twice |
232 |
def decorate_with(context_factory, *args, **kwargs): |
11149.2.4
by Jonathan Lange
Add a decorate_with |
233 |
"""Create a decorator that runs decorated functions with 'context'."""
|
7675.1090.6
by Jeroen Vermeulen
Testing. |
234 |
|
11149.2.4
by Jonathan Lange
Add a decorate_with |
235 |
def decorator(function): |
7675.1090.6
by Jeroen Vermeulen
Testing. |
236 |
|
11149.2.10
by Jonathan Lange
Change the interface of decorate_with so we can call it twice |
237 |
def decorated(*a, **kw): |
238 |
with context_factory(*args, **kwargs): |
|
239 |
return function(*a, **kw) |
|
7675.1090.6
by Jeroen Vermeulen
Testing. |
240 |
|
11149.2.4
by Jonathan Lange
Add a decorate_with |
241 |
return mergeFunctionMetadata(function, decorated) |
7675.1090.6
by Jeroen Vermeulen
Testing. |
242 |
|
11149.2.4
by Jonathan Lange
Add a decorate_with |
243 |
return decorator |
12156.15.12
by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests) |
244 |
|
245 |
||
246 |
def docstring_dedent(s): |
|
247 |
"""Remove leading indentation from a doc string.
|
|
248 |
||
249 |
Since the first line doesn't have indentation, split it off, dedent, and
|
|
250 |
then reassemble.
|
|
251 |
"""
|
|
252 |
# Make sure there is at least one newline so the split works.
|
|
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
253 |
first, rest = (s + '\n').split('\n', 1) |
12156.15.12
by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests) |
254 |
return (first + '\n' + dedent(rest)).strip() |
12336.2.8
by Gavin Panella
Move traceback_info to lp.services.utils. |
255 |
|
256 |
||
7675.1090.6
by Jeroen Vermeulen
Testing. |
257 |
def file_exists(filename): |
258 |
"""Does `filename` exist?"""
|
|
259 |
return os.access(filename, os.F_OK) |
|
260 |
||
261 |
||
12392.1.10
by Jonathan Lange
Add a helper. |
262 |
class CapturedOutput(Fixture): |
263 |
"""A fixture that captures output to stdout and stderr."""
|
|
264 |
||
265 |
def __init__(self): |
|
266 |
super(CapturedOutput, self).__init__() |
|
267 |
self.stdout = StringIO() |
|
268 |
self.stderr = StringIO() |
|
269 |
||
270 |
def setUp(self): |
|
271 |
super(CapturedOutput, self).setUp() |
|
272 |
self.useFixture(MonkeyPatch('sys.stdout', self.stdout)) |
|
273 |
self.useFixture(MonkeyPatch('sys.stderr', self.stderr)) |
|
274 |
||
275 |
||
276 |
def run_capturing_output(function, *args, **kwargs): |
|
277 |
"""Run ``function`` capturing output to stdout and stderr.
|
|
278 |
||
279 |
:param function: A function to run.
|
|
280 |
:param args: Arguments passed to the function.
|
|
281 |
:param kwargs: Keyword arguments passed to the function.
|
|
282 |
:return: A tuple of ``(ret, stdout, stderr)``, where ``ret`` is the value
|
|
283 |
returned by ``function``, ``stdout`` is the captured standard output
|
|
284 |
and ``stderr`` is the captured stderr.
|
|
285 |
"""
|
|
286 |
with CapturedOutput() as captured: |
|
287 |
ret = function(*args, **kwargs) |
|
288 |
return ret, captured.stdout.getvalue(), captured.stderr.getvalue() |
|
289 |
||
290 |
||
12336.2.8
by Gavin Panella
Move traceback_info to lp.services.utils. |
291 |
def traceback_info(info): |
292 |
"""Set `__traceback_info__` in the caller's locals.
|
|
293 |
||
294 |
This is more aesthetically pleasing that assigning to __traceback_info__,
|
|
295 |
but it more importantly avoids spurious lint warnings about unused local
|
|
296 |
variables, and helps to avoid typos.
|
|
297 |
"""
|
|
298 |
sys._getframe(1).f_locals["__traceback_info__"] = info |
|
13278.1.12
by Aaron Bentley
Fake merge of rollback. |
299 |
|
300 |
||
13333.5.2
by Jonathan Lange
Add utc_now |
301 |
def utc_now(): |
302 |
"""Return a timezone-aware timestamp for the current time."""
|
|
13333.5.6
by Jonathan Lange
Actually return the correct time from utc_now() |
303 |
return datetime.now(tz=pytz.UTC) |
13405.9.1
by Henning Eggers
Restored r13373. |
304 |
|
305 |
||
306 |
# This is a regular expression that matches email address embedded in
|
|
307 |
# text. It is not RFC 2821 compliant, nor does it need to be. This
|
|
308 |
# expression strives to identify probable email addresses so that they
|
|
309 |
# can be obfuscated when viewed by unauthenticated users. See
|
|
310 |
# http://www.email-unlimited.com/stuff/email_address_validator.htm
|
|
311 |
||
312 |
# localnames do not have [&?%!@<>,;:`|{}()#*^~ ] in practice
|
|
313 |
# (regardless of RFC 2821) because they conflict with other systems.
|
|
314 |
# See https://lists.ubuntu.com
|
|
315 |
# /mailman/private/launchpad-reviews/2007-June/006081.html
|
|
316 |
||
317 |
# This verson of the re is more than 5x faster that the orginal
|
|
318 |
# version used in ftest/test_tales.testObfuscateEmail.
|
|
319 |
re_email_address = re.compile(r""" |
|
320 |
\b[a-zA-Z0-9._/="'+-]{1,64}@ # The localname.
|
|
321 |
[a-zA-Z][a-zA-Z0-9-]{1,63} # The hostname.
|
|
322 |
\.[a-zA-Z0-9.-]{1,251}\b # Dot starts one or more domains.
|
|
323 |
""", re.VERBOSE) # ' <- font-lock turd |
|
324 |
||
325 |
||
14325.1.4
by mbp at canonical
Add new strip-email formatter and use that in the meta description |
326 |
def obfuscate_email(text_to_obfuscate, replacement=None): |
13405.9.1
by Henning Eggers
Restored r13373. |
327 |
"""Obfuscate an email address.
|
328 |
||
14325.1.4
by mbp at canonical
Add new strip-email formatter and use that in the meta description |
329 |
The email address is obfuscated as <email address hidden> by default,
|
330 |
or with the given replacement.
|
|
13405.9.1
by Henning Eggers
Restored r13373. |
331 |
|
332 |
The pattern used to identify an email address is not 2822. It strives
|
|
333 |
to match any possible email address embedded in the text. For example,
|
|
334 |
mailto:person@domain.dom and http://person:password@domain.dom both
|
|
335 |
match, though the http match is in fact not an email address.
|
|
336 |
"""
|
|
14325.1.4
by mbp at canonical
Add new strip-email formatter and use that in the meta description |
337 |
if replacement is None: |
338 |
replacement = '<email address hidden>' |
|
13405.9.1
by Henning Eggers
Restored r13373. |
339 |
text = re_email_address.sub( |
14325.1.4
by mbp at canonical
Add new strip-email formatter and use that in the meta description |
340 |
replacement, text_to_obfuscate) |
341 |
# Avoid doubled angle brackets.
|
|
13405.9.1
by Henning Eggers
Restored r13373. |
342 |
text = text.replace( |
343 |
"<<email address hidden>>", "<email address hidden>") |
|
344 |
return text |
|
14213.4.29
by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils. |
345 |
|
346 |
||
347 |
def save_bz2_pickle(obj, filename): |
|
348 |
"""Save a bz2 compressed pickle of `obj` to `filename`."""
|
|
349 |
fout = bz2.BZ2File(filename, "w") |
|
350 |
try: |
|
351 |
pickle.dump(obj, fout, pickle.HIGHEST_PROTOCOL) |
|
352 |
finally: |
|
353 |
fout.close() |
|
354 |
||
355 |
||
356 |
def load_bz2_pickle(filename): |
|
357 |
"""Load and return a bz2 compressed pickle from `filename`."""
|
|
358 |
fin = bz2.BZ2File(filename, "r") |
|
359 |
try: |
|
360 |
return pickle.load(fin) |
|
361 |
finally: |
|
362 |
fin.close() |
|
14265.2.1
by Aaron Bentley
Implement obfuscation for structures of basic types. |
363 |
|
364 |
||
365 |
def obfuscate_structure(o): |
|
366 |
"""Obfuscate the strings of a json-serializable structure.
|
|
367 |
||
368 |
Note: tuples are converted to lists because json encoders do not
|
|
369 |
distinguish between lists and tuples.
|
|
370 |
||
371 |
:param o: Any json-serializable object.
|
|
372 |
:return: a possibly-new structure in which all strings, list and tuple
|
|
373 |
elements, and dict keys and values have undergone obfuscate_email
|
|
374 |
recursively.
|
|
375 |
"""
|
|
376 |
if isinstance(o, basestring): |
|
377 |
return obfuscate_email(o) |
|
378 |
elif isinstance(o, (list, tuple)): |
|
379 |
return [obfuscate_structure(value) for value in o] |
|
380 |
elif isinstance(o, (dict)): |
|
381 |
return dict( |
|
382 |
(obfuscate_structure(key), obfuscate_structure(value)) |
|
383 |
for key, value in o.iteritems()) |
|
384 |
else: |
|
385 |
return o |