~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Various functions and classes that are useful across different parts of
launchpad.

Do not simply dump stuff in here.  Think carefully as to whether it would
be better as a method on an existing content object or IFooSet object.
"""

__metaclass__ = type

from difflib import unified_diff
import re
from StringIO import StringIO
import subprocess
import tarfile
import warnings

from zope.security.interfaces import ForbiddenAttribute


def text_replaced(text, replacements, _cache={}):
    """Return a new string with text replaced according to the dict provided.

    The keys of the dict are substrings to find, the values are what to
    replace found substrings with.

    :arg text: An unicode or str to do the replacement.
    :arg replacements: A dictionary with the replacements that should be done

    >>> text_replaced('', {'a':'b'})
    ''
    >>> text_replaced('a', {'a':'c'})
    'c'
    >>> text_replaced('faa bar baz', {'a': 'A', 'aa': 'X'})
    'fX bAr bAz'
    >>> text_replaced('1 2 3 4', {'1': '2', '2': '1'})
    '2 1 3 4'

    Unicode strings work too.

    >>> text_replaced(u'1 2 3 4', {u'1': u'2', u'2': u'1'})
    u'2 1 3 4'

    The argument _cache is used as a cache of replacements that were requested
    before, so we only compute regular expressions once.

    """
    assert replacements, "The replacements dict must not be empty."
    # The ordering of keys and values in the tuple will be consistent within a
    # single Python process.
    cachekey = tuple(replacements.items())
    if cachekey not in _cache:
        L = []
        if isinstance(text, unicode):
            list_item = u'(%s)'
            join_char = u'|'
        else:
            list_item = '(%s)'
            join_char = '|'
        for find, replace in sorted(replacements.items(),
                                    key=lambda (key, value): len(key),
                                    reverse=True):
            L.append(list_item % re.escape(find))
        # Make a copy of the replacements dict, as it is mutable, but we're
        # keeping a cached reference to it.
        replacements_copy = dict(replacements)

        def matchobj_replacer(matchobj):
            return replacements_copy[matchobj.group()]

        regexsub = re.compile(join_char.join(L)).sub

        def replacer(s):
            return regexsub(matchobj_replacer, s)

        _cache[cachekey] = replacer
    return _cache[cachekey](text)


def backslashreplace(str):
    """Return a copy of the string, with non-ASCII characters rendered as
    xNN or uNNNN. Used to test data containing typographical quotes etc.
    """
    return str.decode('UTF-8').encode('ASCII', 'backslashreplace')


def string_to_tarfile(s):
    """Convert a binary string containing a tar file into a tar file obj."""

    return tarfile.open('', 'r', StringIO(s))


def simple_popen2(command, input, env=None, in_bufsize=1024, out_bufsize=128):
    """Run a command, give it input on its standard input, and capture its
    standard output.

    Returns the data from standard output.

    This function is needed to avoid certain deadlock situations. For example,
    if you popen2() a command, write its standard input, then read its
    standard output, this can deadlock due to the parent process blocking on
    writing to the child, while the child process is simultaneously blocking
    on writing to its parent. This function avoids that problem by using
    subprocess.Popen.communicate().
    """

    p = subprocess.Popen(
            command, env=env, stdin=subprocess.PIPE,
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    (output, nothing) = p.communicate(input)
    return output


class ShortListTooBigError(Exception):
    """This error is raised when the shortlist hardlimit is reached"""


def shortlist(sequence, longest_expected=15, hardlimit=None):
    """Return a listified version of sequence.

    If <sequence> has more than <longest_expected> items, a warning is issued.

    >>> shortlist([1, 2])
    [1, 2]

    >>> shortlist([1, 2, 3], 2) #doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
    ...
    UserWarning: shortlist() should not be used here. It's meant to listify
    sequences with no more than 2 items.  There were 3 items.

    >>> shortlist([1, 2, 3, 4], hardlimit=2)
    Traceback (most recent call last):
    ...
    ShortListTooBigError: Hard limit of 2 exceeded.

    >>> shortlist(
    ...     [1, 2, 3, 4], 2, hardlimit=4) #doctest: +NORMALIZE_WHITESPACE
    Traceback (most recent call last):
    ...
    UserWarning: shortlist() should not be used here. It's meant to listify
    sequences with no more than 2 items.  There were 4 items.

    It works on iterable also which don't support the extended slice protocol.

    >>> xrange(5)[:1] #doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    TypeError: ...

    >>> shortlist(xrange(10), 5, hardlimit=8) #doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    ShortListTooBigError: ...

    """
    if hardlimit is not None:
        last = hardlimit + 1
    else:
        last = None
    try:
        results = list(sequence[:last])
    except (TypeError, ForbiddenAttribute):
        results = []
        for idx, item in enumerate(sequence):
            if hardlimit and idx > hardlimit:
                break
            results.append(item)

    size = len(results)
    if hardlimit and size > hardlimit:
        raise ShortListTooBigError(
           'Hard limit of %d exceeded.' % hardlimit)
    elif size > longest_expected:
        warnings.warn(
            "shortlist() should not be used here. It's meant to listify"
            " sequences with no more than %d items.  There were %s items."
            % (longest_expected, size), stacklevel=2)
    return results


def is_tar_filename(filename):
    '''
    Check whether a filename looks like a filename that belongs to a tar file,
    possibly one compressed somehow.
    '''

    return (filename.endswith('.tar') or
            filename.endswith('.tar.gz') or
            filename.endswith('.tgz') or
            filename.endswith('.tar.bz2'))


def test_diff(lines_a, lines_b):
    """Generate a string indicating the difference between expected and actual
    values in a test.
    """

    return '\n'.join(list(unified_diff(
        a=lines_a,
        b=lines_b,
        fromfile='expected',
        tofile='actual',
        lineterm='',
        )))


def filenameToContentType(fname):
    """ Return the a ContentType-like entry for arbitrary filenames

    deb files

    >>> filenameToContentType('test.deb')
    'application/x-debian-package'

    text files

    >>> filenameToContentType('test.txt')
    'text/plain'

    Not recognized format

    >>> filenameToContentType('test.tgz')
    'application/octet-stream'
    """
    ftmap = {".dsc": "text/plain",
             ".changes": "text/plain",
             ".deb": "application/x-debian-package",
             ".udeb": "application/x-debian-package",
             ".txt": "text/plain",
             # For the build master logs
             ".txt.gz": "text/plain",
             }
    for ending in ftmap:
        if fname.endswith(ending):
            return ftmap[ending]
    return "application/octet-stream"


def intOrZero(value):
    """Return int(value) or 0 if the conversion fails.

    >>> intOrZero('1.23')
    0
    >>> intOrZero('1.ab')
    0
    >>> intOrZero('2')
    2
    >>> intOrZero(None)
    0
    >>> intOrZero(1)
    1
    >>> intOrZero(-9)
    -9
    """
    try:
        return int(value)
    except (ValueError, TypeError):
        return 0


def truncate_text(text, max_length):
    """Return a version of string no longer than max_length characters.

    Tries not to cut off the text mid-word.
    """
    words = re.compile(r'\s*\S+').findall(text, 0, max_length + 1)
    truncated = words[0]
    for word in words[1:]:
        if len(truncated) + len(word) > max_length:
            break
        truncated += word
    return truncated[:max_length]


def english_list(items, conjunction='and'):
    """Return all the items concatenated into a English-style string.

    Follows the advice given in The Elements of Style, chapter I,
    section 2:

    "In a series of three or more terms with a single conjunction, use
     a comma after each term except the last."

    Beware that this is US English and is wrong for non-US.
    """
    items = list(items)
    if len(items) <= 2:
        return (' %s ' % conjunction).join(items)
    else:
        items[-1] = '%s %s' % (conjunction, items[-1])
        return ', '.join(items)


def ensure_unicode(string):
    r"""Return input as unicode. None is passed through unharmed.

    Do not use this method. This method exists only to help migration
    of legacy code where str objects were being passed into contexts
    where unicode objects are required. All invokations of
    ensure_unicode() should eventually be removed.

    This differs from the builtin unicode() function, as a TypeError
    exception will be raised if the parameter is not a basestring or if
    a raw string is not ASCII.

    >>> ensure_unicode(u'hello')
    u'hello'

    >>> ensure_unicode('hello')
    u'hello'

    >>> ensure_unicode(u'A'.encode('utf-16')) # Not ASCII
    Traceback (most recent call last):
    ...
    TypeError: '\xff\xfeA\x00' is not US-ASCII

    >>> ensure_unicode(42)
    Traceback (most recent call last):
    ...
    TypeError: 42 is not a basestring (<type 'int'>)

    >>> ensure_unicode(None) is None
    True
    """
    if string is None:
        return None
    elif isinstance(string, unicode):
        return string
    elif isinstance(string, basestring):
        try:
            return string.decode('US-ASCII')
        except UnicodeDecodeError:
            raise TypeError("%s is not US-ASCII" % repr(string))
    else:
        raise TypeError(
            "%r is not a basestring (%r)" % (string, type(string)))