~launchpad-pqm/launchpad/devel

12398.2.1 by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests.
1
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
9719.2.1 by Michael Hudson
move iter_split somewhere generic
2
# GNU Affero General Public License version 3 (see the file LICENSE).
3
10662.1.3 by Jonathan Lange
Move highly-general synchronization function to services
4
"""Generic Python utilities.
9719.2.1 by Michael Hudson
move iter_split somewhere generic
5
10662.1.3 by Jonathan Lange
Move highly-general synchronization function to services
6
Functions, lists and so forth. Nothing here that does system calls or network
7
stuff.
8
"""
9719.2.1 by Michael Hudson
move iter_split somewhere generic
9
10
__metaclass__ = type
11
__all__ = [
12398.2.1 by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests.
12
    'AutoDecorate',
12398.2.4 by Jonathan Lange
Move 'base' into lp.services
13
    'base',
10652.2.1 by Tim Penhey
Add in a caching iterator.
14
    'CachingIterator',
12398.2.7 by Jonathan Lange
Really remove autodecorate.txt doctest and actually export compress_hash.
15
    'compress_hash',
11149.2.4 by Jonathan Lange
Add a decorate_with
16
    'decorate_with',
12156.15.12 by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests)
17
    'docstring_dedent',
7675.1090.6 by Jeroen Vermeulen
Testing.
18
    'file_exists',
12685.5.1 by Jonathan Lange
Remove some old workarounds and move some code from one place to a better place.
19
    'iter_list_chunks',
9719.2.1 by Michael Hudson
move iter_split somewhere generic
20
    'iter_split',
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
21
    'load_bz2_pickle',
13405.9.1 by Henning Eggers
Restored r13373.
22
    'obfuscate_email',
14265.2.1 by Aaron Bentley
Implement obfuscation for structures of basic types.
23
    'obfuscate_structure',
13405.9.1 by Henning Eggers
Restored r13373.
24
    're_email_address',
12392.1.10 by Jonathan Lange
Add a helper.
25
    'run_capturing_output',
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
26
    'save_bz2_pickle',
10662.1.3 by Jonathan Lange
Move highly-general synchronization function to services
27
    'synchronize',
10541.1.3 by Tim Penhey
Move the functions around again.
28
    'text_delta',
12336.2.8 by Gavin Panella
Move traceback_info to lp.services.utils.
29
    'traceback_info',
13333.5.2 by Jonathan Lange
Add utc_now
30
    'utc_now',
10541.1.3 by Tim Penhey
Move the functions around again.
31
    'value_string',
9719.2.1 by Michael Hudson
move iter_split somewhere generic
32
    ]
33
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
34
import bz2
13333.5.2 by Jonathan Lange
Add utc_now
35
from datetime import datetime
12336.1.2 by Gavin Panella
Format imports.
36
from itertools import tee
7675.1090.6 by Jeroen Vermeulen
Testing.
37
import os
13405.9.1 by Henning Eggers
Restored r13373.
38
import re
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
39
import string
12392.1.10 by Jonathan Lange
Add a helper.
40
from StringIO import StringIO
12336.2.15 by Gavin Panella
Merge devel, resolving 1 conflict.
41
import sys
12156.15.12 by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests)
42
from textwrap import dedent
12398.2.1 by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests.
43
from types import FunctionType
10541.1.4 by Tim Penhey
Fix some pedantry.
44
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
45
import cPickle as pickle
12392.1.10 by Jonathan Lange
Add a helper.
46
from fixtures import (
47
    Fixture,
48
    MonkeyPatch,
49
    )
10541.1.4 by Tim Penhey
Fix some pedantry.
50
from lazr.enum import BaseItem
13333.5.2 by Jonathan Lange
Add utc_now
51
import pytz
11149.2.4 by Jonathan Lange
Add a decorate_with
52
from twisted.python.util import mergeFunctionMetadata
10541.1.3 by Tim Penhey
Move the functions around again.
53
from zope.security.proxy import isinstance as zope_isinstance
54
9719.2.1 by Michael Hudson
move iter_split somewhere generic
55
12398.2.1 by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests.
56
def AutoDecorate(*decorators):
57
    """Factory to generate metaclasses that automatically apply decorators.
58
59
    AutoDecorate is a metaclass factory that can be used to make a class
60
    implicitly wrap all of its methods with one or more decorators.
61
    """
62
63
    class AutoDecorateMetaClass(type):
7675.1090.6 by Jeroen Vermeulen
Testing.
64
12398.2.1 by Jonathan Lange
Move AutoDecorate into lp.services.utils and give it unit tests.
65
        def __new__(cls, class_name, bases, class_dict):
66
            new_class_dict = {}
67
            for name, value in class_dict.items():
68
                if type(value) == FunctionType:
69
                    for decorator in decorators:
70
                        value = decorator(value)
71
                        assert callable(value), (
72
                            "Decorator %s didn't return a callable."
73
                            % repr(decorator))
74
                new_class_dict[name] = value
75
            return type.__new__(cls, class_name, bases, new_class_dict)
76
77
    return AutoDecorateMetaClass
78
79
12398.2.4 by Jonathan Lange
Move 'base' into lp.services
80
def base(number, radix):
81
    """Convert 'number' to an arbitrary base numbering scheme, 'radix'.
82
83
    This function is based on work from the Python Cookbook and is under the
84
    Python license.
85
86
    Inverse function to int(str, radix) and long(str, radix)
87
    """
88
    if not 2 <= radix <= 62:
12398.2.16 by Jonathan Lange
Respond to review comments
89
        raise ValueError("radix must be between 2 and 62: %s" % (radix,))
90
91
    if number < 0:
92
        raise ValueError("number must be non-negative: %s" % (number,))
12398.2.4 by Jonathan Lange
Move 'base' into lp.services
93
94
    result = []
95
    addon = result.append
12398.2.16 by Jonathan Lange
Respond to review comments
96
    if number == 0:
12398.2.4 by Jonathan Lange
Move 'base' into lp.services
97
        addon('0')
98
99
    ABC = string.digits + string.ascii_letters
100
    while number:
101
        number, rdigit = divmod(number, radix)
102
        addon(ABC[rdigit])
103
104
    result.reverse()
105
    return ''.join(result)
106
107
108
def compress_hash(hash_obj):
109
    """Compress a hash_obj using `base`.
110
111
    Given an ``md5`` or ``sha1`` hash object, compress it down to either 22 or
112
    27 characters in a way that's safe to be used in URLs. Takes the hex of
113
    the hash and converts it to base 62.
114
    """
115
    return base(int(hash_obj.hexdigest(), 16), 62)
116
117
9719.2.1 by Michael Hudson
move iter_split somewhere generic
118
def iter_split(string, splitter):
119
    """Iterate over ways to split 'string' in two with 'splitter'.
120
121
    If 'string' is empty, then yield nothing. Otherwise, yield tuples like
122
    ('a/b/c', ''), ('a/b', 'c'), ('a', 'b/c') for a string 'a/b/c' and a
123
    splitter '/'.
124
125
    The tuples are yielded such that the first tuple has everything in the
126
    first tuple. With each iteration, the first element gets smaller and the
127
    second gets larger. It stops iterating just before it would have to yield
128
    ('', 'a/b/c').
129
    """
130
    if string == '':
131
        return
132
    tokens = string.split(splitter)
133
    for i in reversed(range(1, len(tokens) + 1)):
134
        yield splitter.join(tokens[:i]), splitter.join(tokens[i:])
10541.1.3 by Tim Penhey
Move the functions around again.
135
136
12685.5.1 by Jonathan Lange
Remove some old workarounds and move some code from one place to a better place.
137
def iter_list_chunks(a_list, size):
138
    """Iterate over `a_list` in chunks of size `size`.
139
140
    I'm amazed this isn't in itertools (mwhudson).
141
    """
142
    for i in range(0, len(a_list), size):
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
143
        yield a_list[i:i + size]
12685.5.1 by Jonathan Lange
Remove some old workarounds and move some code from one place to a better place.
144
145
10662.1.3 by Jonathan Lange
Move highly-general synchronization function to services
146
def synchronize(source, target, add, remove):
147
    """Update 'source' to match 'target' using 'add' and 'remove'.
148
149
    Changes the container 'source' so that it equals 'target', calling 'add'
150
    with any object in 'target' not in 'source' and 'remove' with any object
151
    not in 'target' but in 'source'.
152
    """
153
    need_to_add = [obj for obj in target if obj not in source]
154
    need_to_remove = [obj for obj in source if obj not in target]
155
    for obj in need_to_add:
156
        add(obj)
157
    for obj in need_to_remove:
158
        remove(obj)
159
160
10541.1.3 by Tim Penhey
Move the functions around again.
161
def value_string(item):
10541.1.4 by Tim Penhey
Fix some pedantry.
162
    """Return a unicode string representing value.
163
164
    This text is special cased for enumerated types.
165
    """
10541.1.3 by Tim Penhey
Move the functions around again.
166
    if item is None:
167
        return '(not set)'
168
    elif zope_isinstance(item, BaseItem):
169
        return item.title
170
    else:
171
        return unicode(item)
172
173
174
def text_delta(instance_delta, delta_names, state_names, interface):
175
    """Return a textual delta for a Delta object.
176
177
    A list of strings is returned.
178
179
    Only modified members of the delta will be shown.
180
181
    :param instance_delta: The delta to generate a textual representation of.
182
    :param delta_names: The names of all members to show changes to.
183
    :param state_names: The names of all members to show only the new state
184
        of.
185
    :param interface: The Zope interface that the input delta compared.
186
    """
187
    output = []
188
    indent = ' ' * 4
189
190
    # Fields for which we have old and new values.
191
    for field_name in delta_names:
192
        delta = getattr(instance_delta, field_name, None)
193
        if delta is None:
194
            continue
195
        title = interface[field_name].title
196
        old_item = value_string(delta['old'])
197
        new_item = value_string(delta['new'])
198
        output.append("%s%s: %s => %s" % (indent, title, old_item, new_item))
199
    for field_name in state_names:
200
        delta = getattr(instance_delta, field_name, None)
201
        if delta is None:
202
            continue
203
        title = interface[field_name].title
204
        if output:
205
            output.append('')
206
        output.append('%s changed to:\n\n%s' % (title, delta))
207
    return '\n'.join(output)
10652.2.1 by Tim Penhey
Add in a caching iterator.
208
209
210
class CachingIterator:
211
    """Remember the items extracted from the iterator for the next iteration.
212
213
    Some generators and iterators are expensive to calculate, like calculating
214
    the merge sorted revision graph for a bazaar branch, so you don't want to
215
    call them too often.  Rearranging the code so it doesn't call the
10652.2.6 by Tim Penhey
More cleanup.
216
    expensive iterator can make the code awkward.  This class provides a way
217
    to have the iterator called once, and the results stored.  The results
10652.2.1 by Tim Penhey
Add in a caching iterator.
218
    can then be iterated over again, and more values retrieved from the
219
    iterator if necessary.
220
    """
10652.2.6 by Tim Penhey
More cleanup.
221
10652.2.1 by Tim Penhey
Add in a caching iterator.
222
    def __init__(self, iterator):
223
        self.iterator = iterator
10652.2.6 by Tim Penhey
More cleanup.
224
10652.2.1 by Tim Penhey
Add in a caching iterator.
225
    def __iter__(self):
12336.1.1 by Gavin Panella
Use itertools.tee() to simplify and almost certainly speed up CachingIterator.
226
        # Teeing an iterator previously returned by tee won't cause heat
227
        # death. See tee_copy in itertoolsmodule.c in the Python source.
228
        self.iterator, iterator = tee(self.iterator)
229
        return iterator
11149.2.3 by Jonathan Lange
Add a new 'run_with' method.
230
231
11149.2.10 by Jonathan Lange
Change the interface of decorate_with so we can call it twice
232
def decorate_with(context_factory, *args, **kwargs):
11149.2.4 by Jonathan Lange
Add a decorate_with
233
    """Create a decorator that runs decorated functions with 'context'."""
7675.1090.6 by Jeroen Vermeulen
Testing.
234
11149.2.4 by Jonathan Lange
Add a decorate_with
235
    def decorator(function):
7675.1090.6 by Jeroen Vermeulen
Testing.
236
11149.2.10 by Jonathan Lange
Change the interface of decorate_with so we can call it twice
237
        def decorated(*a, **kw):
238
            with context_factory(*args, **kwargs):
239
                return function(*a, **kw)
7675.1090.6 by Jeroen Vermeulen
Testing.
240
11149.2.4 by Jonathan Lange
Add a decorate_with
241
        return mergeFunctionMetadata(function, decorated)
7675.1090.6 by Jeroen Vermeulen
Testing.
242
11149.2.4 by Jonathan Lange
Add a decorate_with
243
    return decorator
12156.15.12 by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests)
244
245
246
def docstring_dedent(s):
247
    """Remove leading indentation from a doc string.
248
249
    Since the first line doesn't have indentation, split it off, dedent, and
250
    then reassemble.
251
    """
252
    # Make sure there is at least one newline so the split works.
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
253
    first, rest = (s + '\n').split('\n', 1)
12156.15.12 by Benji York
- move docstring_dedent into a (preexisting) utility module (and add tests)
254
    return (first + '\n' + dedent(rest)).strip()
12336.2.8 by Gavin Panella
Move traceback_info to lp.services.utils.
255
256
7675.1090.6 by Jeroen Vermeulen
Testing.
257
def file_exists(filename):
258
    """Does `filename` exist?"""
259
    return os.access(filename, os.F_OK)
260
261
12392.1.10 by Jonathan Lange
Add a helper.
262
class CapturedOutput(Fixture):
263
    """A fixture that captures output to stdout and stderr."""
264
265
    def __init__(self):
266
        super(CapturedOutput, self).__init__()
267
        self.stdout = StringIO()
268
        self.stderr = StringIO()
269
270
    def setUp(self):
271
        super(CapturedOutput, self).setUp()
272
        self.useFixture(MonkeyPatch('sys.stdout', self.stdout))
273
        self.useFixture(MonkeyPatch('sys.stderr', self.stderr))
274
275
276
def run_capturing_output(function, *args, **kwargs):
277
    """Run ``function`` capturing output to stdout and stderr.
278
279
    :param function: A function to run.
280
    :param args: Arguments passed to the function.
281
    :param kwargs: Keyword arguments passed to the function.
282
    :return: A tuple of ``(ret, stdout, stderr)``, where ``ret`` is the value
283
        returned by ``function``, ``stdout`` is the captured standard output
284
        and ``stderr`` is the captured stderr.
285
    """
286
    with CapturedOutput() as captured:
287
        ret = function(*args, **kwargs)
288
    return ret, captured.stdout.getvalue(), captured.stderr.getvalue()
289
290
12336.2.8 by Gavin Panella
Move traceback_info to lp.services.utils.
291
def traceback_info(info):
292
    """Set `__traceback_info__` in the caller's locals.
293
294
    This is more aesthetically pleasing that assigning to __traceback_info__,
295
    but it more importantly avoids spurious lint warnings about unused local
296
    variables, and helps to avoid typos.
297
    """
298
    sys._getframe(1).f_locals["__traceback_info__"] = info
13278.1.12 by Aaron Bentley
Fake merge of rollback.
299
300
13333.5.2 by Jonathan Lange
Add utc_now
301
def utc_now():
302
    """Return a timezone-aware timestamp for the current time."""
13333.5.6 by Jonathan Lange
Actually return the correct time from utc_now()
303
    return datetime.now(tz=pytz.UTC)
13405.9.1 by Henning Eggers
Restored r13373.
304
305
306
# This is a regular expression that matches email address embedded in
307
# text. It is not RFC 2821 compliant, nor does it need to be. This
308
# expression strives to identify probable email addresses so that they
309
# can be obfuscated when viewed by unauthenticated users. See
310
# http://www.email-unlimited.com/stuff/email_address_validator.htm
311
312
# localnames do not have [&?%!@<>,;:`|{}()#*^~ ] in practice
313
# (regardless of RFC 2821) because they conflict with other systems.
314
# See https://lists.ubuntu.com
315
#     /mailman/private/launchpad-reviews/2007-June/006081.html
316
317
# This verson of the re is more than 5x faster that the orginal
318
# version used in ftest/test_tales.testObfuscateEmail.
319
re_email_address = re.compile(r"""
320
    \b[a-zA-Z0-9._/="'+-]{1,64}@  # The localname.
321
    [a-zA-Z][a-zA-Z0-9-]{1,63}    # The hostname.
322
    \.[a-zA-Z0-9.-]{1,251}\b      # Dot starts one or more domains.
323
    """, re.VERBOSE)              # ' <- font-lock turd
324
325
14325.1.4 by mbp at canonical
Add new strip-email formatter and use that in the meta description
326
def obfuscate_email(text_to_obfuscate, replacement=None):
13405.9.1 by Henning Eggers
Restored r13373.
327
    """Obfuscate an email address.
328
14325.1.4 by mbp at canonical
Add new strip-email formatter and use that in the meta description
329
    The email address is obfuscated as <email address hidden> by default,
330
    or with the given replacement.
13405.9.1 by Henning Eggers
Restored r13373.
331
332
    The pattern used to identify an email address is not 2822. It strives
333
    to match any possible email address embedded in the text. For example,
334
    mailto:person@domain.dom and http://person:password@domain.dom both
335
    match, though the http match is in fact not an email address.
336
    """
14325.1.4 by mbp at canonical
Add new strip-email formatter and use that in the meta description
337
    if replacement is None:
338
        replacement = '<email address hidden>'
13405.9.1 by Henning Eggers
Restored r13373.
339
    text = re_email_address.sub(
14325.1.4 by mbp at canonical
Add new strip-email formatter and use that in the meta description
340
        replacement, text_to_obfuscate)
341
    # Avoid doubled angle brackets.
13405.9.1 by Henning Eggers
Restored r13373.
342
    text = text.replace(
343
        "<<email address hidden>>", "<email address hidden>")
344
    return text
14213.4.29 by Gavin Panella
Move load() and save() from check-teamparticipation.py to lp.services.utils.
345
346
347
def save_bz2_pickle(obj, filename):
348
    """Save a bz2 compressed pickle of `obj` to `filename`."""
349
    fout = bz2.BZ2File(filename, "w")
350
    try:
351
        pickle.dump(obj, fout, pickle.HIGHEST_PROTOCOL)
352
    finally:
353
        fout.close()
354
355
356
def load_bz2_pickle(filename):
357
    """Load and return a bz2 compressed pickle from `filename`."""
358
    fin = bz2.BZ2File(filename, "r")
359
    try:
360
        return pickle.load(fin)
361
    finally:
362
        fin.close()
14265.2.1 by Aaron Bentley
Implement obfuscation for structures of basic types.
363
364
365
def obfuscate_structure(o):
366
    """Obfuscate the strings of a json-serializable structure.
367
368
    Note: tuples are converted to lists because json encoders do not
369
    distinguish between lists and tuples.
370
371
    :param o: Any json-serializable object.
372
    :return: a possibly-new structure in which all strings, list and tuple
373
        elements, and dict keys and values have undergone obfuscate_email
374
        recursively.
375
    """
376
    if isinstance(o, basestring):
377
        return obfuscate_email(o)
378
    elif isinstance(o, (list, tuple)):
379
        return [obfuscate_structure(value) for value in o]
380
    elif isinstance(o, (dict)):
381
        return dict(
382
            (obfuscate_structure(key), obfuscate_structure(value))
383
            for key, value in o.iteritems())
384
    else:
385
        return o