~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# Copyright 2009 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Chunky diffs.

Useful for page tests that have elisions.
"""

import re

__metaclass__ = type

def elided_source(tested, actual, debug=False, show=False,
                  normalize_whitespace=False):
    if debug:
        import pdb; pdb.set_trace()
    chunks = tested.split('...')

    previous_chunk = None
    chunk = None
    Unknown = None
    currentpos = 0
    results = []
    for next_chunk in chunks + [None]:
        if chunk is None:
            chunk = next_chunk
            continue
        if chunk != '':
            if previous_chunk is None:
                chunk_starts_with_ellipsis = False
            else:
                chunk_starts_with_ellipsis = True
            if next_chunk is None:
                chunk_ends_with_ellipsis = False
            else:
                chunk_ends_with_ellipsis = True

            result = find_chunk(
                chunk, actual[currentpos:],
                anchor_start=not chunk_starts_with_ellipsis,
                anchor_end=not chunk_ends_with_ellipsis,
                debug=debug, show=show,
                normalize_whitespace=normalize_whitespace)
            if result is None:
                results.append(None)
            else:
                string, startofremainder = result
                currentpos += startofremainder
                # XXX: ddaa 2005-03-25:
                # Off by one. Should be += startofremainder + 1
                results.append(ResultChunk(string, currentpos))
        previous_chunk, chunk = chunk, next_chunk

    starts_with_ellipsis = chunks[0] == ''
    ends_with_ellipsis = chunks[-1] == ''

    resultsummary = ''.join(
        [mnemonic_for_result(result) for result in results]
        )
    if re.match('^N+$', resultsummary):
        # If all results are None...
        output = actual
    elif re.match('^S+$', resultsummary):
        # If no results are None...
        output = '...'.join([result.text for result in results])
        if starts_with_ellipsis:
            output = '...' + output
        if ends_with_ellipsis:
            output = output + '...'
    elif re.match('^S+N+$', resultsummary) and ends_with_ellipsis:
        # Where there are one or more None values at the end of results,
        # and ends_with_ellipsis, we can end without an ellipsis while
        # including the remainder of 'actual' from the end of the last
        # matched chunk.

        # Find last non-None result.
        for result in reversed(results):
            if result is not None:
                break
        # Get the remainder value from it.
        if starts_with_ellipsis:
            output = '...'
        else:
            # XXX: ddaa 2005-03-25: Test this code path!
            output = ''
        last_result = None
        for result in results:
            if result is not None:
                output += result.text
                last_result = result
            else:
                output += actual[last_result.remainderpos:]
                break

    else:
        # XXX: ddaa 2005-03-25: Test this code path!
        output = actual

    return output

class ResultChunk:

    def __init__(self, text, remainderpos):
        self.text = text
        self.remainderpos = remainderpos


def reversed(seq):
    L = list(seq)
    L.reverse()
    return L

def mnemonic_for_result(result):
    """Returns 'N' if result is None, otherwise 'S'."""
    if result is None:
        return 'N'
    else:
        return 'S'

def find_chunk(chunk, actual, anchor_start=False, anchor_end=False,
               debug=False, show=False, normalize_whitespace=False):
    if debug:
        import pdb; pdb.set_trace()
    if not anchor_start:
        # Find the start of the chunk.
        beginning = ''
        beginning_for_regex = ''
        manyfound = False
        for char in chunk:
            if normalize_whitespace and char.isspace():
                if beginning_for_regex[-2:] != r'\s':
                    beginning_for_regex += r'\s'
            else:
                beginning_for_regex += re.escape(char)
            beginning += char
            numfound = len(re.findall(beginning_for_regex, actual))
            #numfound = actual.count(beginning)
            if numfound == 0:
                if manyfound:
                    beginning = manyfound_beginning
                    beginning_for_regex = manyfound_beginning_for_regex
                    if anchor_end:
                        beginning_pos = list(re.finditer(
                            beginning_for_regex, actual))[-1].start()
                        #beginning_pos = actual.rfind(beginning)
                    else:
                        beginning_pos = re.search(
                            beginning_for_regex, actual).start()
                        # XXX ddaa 2005-03-25: This should be .span()[1]. 
                        # Needs a test.
                        #beginning_pos = actual.find(beginning)
                    break
                else:
                    beginning = ''
                    beginning_for_regex = ''
                    beginning_pos = 0
                    break
            elif numfound == 1:
                beginning_pos = re.search(
                    beginning_for_regex, actual).span()[1]
                #beginning_pos = actual.find(beginning) + len(beginning)
                break
            else:
                manyfound = True
                manyfound_beginning = beginning
                manyfound_beginning_for_regex = beginning_for_regex
        else:
            if manyfound:
                if anchor_end:
                    beginning_pos = list(re.finditer(
                        beginning_for_regex, actual))[-1].start()
                    #beginning_pos = actual.rfind(beginning)
                else:
                    beginning_pos = re.search(
                        beginning_for_regex, actual).start()
                    # XXX ddaa 2005-03-25: This should be .span()[1]. 
                    # Needs a test.
                    #beginning_pos = actual.find(beginning)
            else:
                return None
    else:
        beginning_pos = 0
        beginning = ''
        beginning_for_regex = ''

    # Find the end of the chunk.
    end = ''
    end_for_regex = ''
    chunk_with_no_beginning = chunk[len(beginning):]
    if not chunk_with_no_beginning:
        end_pos = beginning_pos
    elif not anchor_end:
        # Remove the beginning from the chunk.
        reversed_chunk = list(chunk_with_no_beginning)
        reversed_chunk.reverse()
        manyfound = False
        for char in reversed_chunk:
            end = char + end
            if normalize_whitespace and char.isspace():
                if end_for_regex[:2] != r'\s':
                    end_for_regex = r'\s' + end_for_regex
            else:
                end_for_regex = re.escape(char) + end_for_regex
            numfound = len(re.findall(end_for_regex, actual))
            #numfound = actual.count(end)
            if numfound == 0:
                # None found this time around.  If we previously found more
                # than one match, then choose the closest to the beginning.
                if manyfound:
                    end = manyfound_end
                    end_for_regex = manyfound_end_for_regex
                    end_pos = re.search(end_for_regex, actual).start()
                    #end_pos = actual.find(end, beginning_pos)
                    # XXX: ddaa 2005-03-25:
                    #      This was wrong -- shouldn't be beginning_pos as
                    #      we've already chopped off the beginning!
                    #      Or is it?  We chopped the beginning of the chunk,
                    #      not the actual stuff. So, using beginning_pos
                    #      still holds. Need to chop that off and add on
                    #      its length.
                    break
                else:
                    return None
            elif numfound == 1:
                end_pos = re.search(end_for_regex, actual).start()
                #end_pos = actual.rfind(end)
                # XXX: ddaa 2005-03-25: Only one found, so why not use find()?
                break
            else:
                manyfound = True
                manyfound_end = end
                manyfound_end_for_regex = end_for_regex
        else:
            if manyfound:
                end_pos = re.search(end_for_regex, actual).start()
            else:
                return None
    else:
        end_pos = len(actual)
        end = ''
        end_for_regex = ''

    chunk_equivalent = actual[beginning_pos:end_pos]
    if show:
        output = '[%s]%s[%s]' % (beginning, chunk_equivalent, end)
    else:
        output = '%s%s%s' % (beginning, chunk_equivalent, end)
    # XXX: ddaa 2005-03-25: end_pos+1 is the end of chunk_equivalent, not end.
    return (output, end_pos+1)