1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
|
# Copyright 2009 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Chunky diffs.
Useful for page tests that have elisions.
"""
import re
__metaclass__ = type
def elided_source(tested, actual, debug=False, show=False,
normalize_whitespace=False):
if debug:
import pdb; pdb.set_trace()
chunks = tested.split('...')
previous_chunk = None
chunk = None
Unknown = None
currentpos = 0
results = []
for next_chunk in chunks + [None]:
if chunk is None:
chunk = next_chunk
continue
if chunk != '':
if previous_chunk is None:
chunk_starts_with_ellipsis = False
else:
chunk_starts_with_ellipsis = True
if next_chunk is None:
chunk_ends_with_ellipsis = False
else:
chunk_ends_with_ellipsis = True
result = find_chunk(
chunk, actual[currentpos:],
anchor_start=not chunk_starts_with_ellipsis,
anchor_end=not chunk_ends_with_ellipsis,
debug=debug, show=show,
normalize_whitespace=normalize_whitespace)
if result is None:
results.append(None)
else:
string, startofremainder = result
currentpos += startofremainder
# XXX: ddaa 2005-03-25:
# Off by one. Should be += startofremainder + 1
results.append(ResultChunk(string, currentpos))
previous_chunk, chunk = chunk, next_chunk
starts_with_ellipsis = chunks[0] == ''
ends_with_ellipsis = chunks[-1] == ''
resultsummary = ''.join(
[mnemonic_for_result(result) for result in results]
)
if re.match('^N+$', resultsummary):
# If all results are None...
output = actual
elif re.match('^S+$', resultsummary):
# If no results are None...
output = '...'.join([result.text for result in results])
if starts_with_ellipsis:
output = '...' + output
if ends_with_ellipsis:
output = output + '...'
elif re.match('^S+N+$', resultsummary) and ends_with_ellipsis:
# Where there are one or more None values at the end of results,
# and ends_with_ellipsis, we can end without an ellipsis while
# including the remainder of 'actual' from the end of the last
# matched chunk.
# Find last non-None result.
for result in reversed(results):
if result is not None:
break
# Get the remainder value from it.
if starts_with_ellipsis:
output = '...'
else:
# XXX: ddaa 2005-03-25: Test this code path!
output = ''
last_result = None
for result in results:
if result is not None:
output += result.text
last_result = result
else:
output += actual[last_result.remainderpos:]
break
else:
# XXX: ddaa 2005-03-25: Test this code path!
output = actual
return output
class ResultChunk:
def __init__(self, text, remainderpos):
self.text = text
self.remainderpos = remainderpos
def reversed(seq):
L = list(seq)
L.reverse()
return L
def mnemonic_for_result(result):
"""Returns 'N' if result is None, otherwise 'S'."""
if result is None:
return 'N'
else:
return 'S'
def find_chunk(chunk, actual, anchor_start=False, anchor_end=False,
debug=False, show=False, normalize_whitespace=False):
if debug:
import pdb; pdb.set_trace()
if not anchor_start:
# Find the start of the chunk.
beginning = ''
beginning_for_regex = ''
manyfound = False
for char in chunk:
if normalize_whitespace and char.isspace():
if beginning_for_regex[-2:] != r'\s':
beginning_for_regex += r'\s'
else:
beginning_for_regex += re.escape(char)
beginning += char
numfound = len(re.findall(beginning_for_regex, actual))
#numfound = actual.count(beginning)
if numfound == 0:
if manyfound:
beginning = manyfound_beginning
beginning_for_regex = manyfound_beginning_for_regex
if anchor_end:
beginning_pos = list(re.finditer(
beginning_for_regex, actual))[-1].start()
#beginning_pos = actual.rfind(beginning)
else:
beginning_pos = re.search(
beginning_for_regex, actual).start()
# XXX ddaa 2005-03-25: This should be .span()[1].
# Needs a test.
#beginning_pos = actual.find(beginning)
break
else:
beginning = ''
beginning_for_regex = ''
beginning_pos = 0
break
elif numfound == 1:
beginning_pos = re.search(
beginning_for_regex, actual).span()[1]
#beginning_pos = actual.find(beginning) + len(beginning)
break
else:
manyfound = True
manyfound_beginning = beginning
manyfound_beginning_for_regex = beginning_for_regex
else:
if manyfound:
if anchor_end:
beginning_pos = list(re.finditer(
beginning_for_regex, actual))[-1].start()
#beginning_pos = actual.rfind(beginning)
else:
beginning_pos = re.search(
beginning_for_regex, actual).start()
# XXX ddaa 2005-03-25: This should be .span()[1].
# Needs a test.
#beginning_pos = actual.find(beginning)
else:
return None
else:
beginning_pos = 0
beginning = ''
beginning_for_regex = ''
# Find the end of the chunk.
end = ''
end_for_regex = ''
chunk_with_no_beginning = chunk[len(beginning):]
if not chunk_with_no_beginning:
end_pos = beginning_pos
elif not anchor_end:
# Remove the beginning from the chunk.
reversed_chunk = list(chunk_with_no_beginning)
reversed_chunk.reverse()
manyfound = False
for char in reversed_chunk:
end = char + end
if normalize_whitespace and char.isspace():
if end_for_regex[:2] != r'\s':
end_for_regex = r'\s' + end_for_regex
else:
end_for_regex = re.escape(char) + end_for_regex
numfound = len(re.findall(end_for_regex, actual))
#numfound = actual.count(end)
if numfound == 0:
# None found this time around. If we previously found more
# than one match, then choose the closest to the beginning.
if manyfound:
end = manyfound_end
end_for_regex = manyfound_end_for_regex
end_pos = re.search(end_for_regex, actual).start()
#end_pos = actual.find(end, beginning_pos)
# XXX: ddaa 2005-03-25:
# This was wrong -- shouldn't be beginning_pos as
# we've already chopped off the beginning!
# Or is it? We chopped the beginning of the chunk,
# not the actual stuff. So, using beginning_pos
# still holds. Need to chop that off and add on
# its length.
break
else:
return None
elif numfound == 1:
end_pos = re.search(end_for_regex, actual).start()
#end_pos = actual.rfind(end)
# XXX: ddaa 2005-03-25: Only one found, so why not use find()?
break
else:
manyfound = True
manyfound_end = end
manyfound_end_for_regex = end_for_regex
else:
if manyfound:
end_pos = re.search(end_for_regex, actual).start()
else:
return None
else:
end_pos = len(actual)
end = ''
end_for_regex = ''
chunk_equivalent = actual[beginning_pos:end_pos]
if show:
output = '[%s]%s[%s]' % (beginning, chunk_equivalent, end)
else:
output = '%s%s%s' % (beginning, chunk_equivalent, end)
# XXX: ddaa 2005-03-25: end_pos+1 is the end of chunk_equivalent, not end.
return (output, end_pos+1)
|