9492.1.1
by Karl Fogel
Add utilities/formatdoctest.py and utilities/migrater/, both brought |
1 |
#!/usr/bin/python
|
2 |
#
|
|
3 |
# Copyright (C) 2009 - Curtis Hovey <sinzui.is at verizon.net>
|
|
4 |
# This software is licensed under the GNU General Public License version 2.
|
|
5 |
#
|
|
6 |
# It comes from the Gedit Developer Plugins project (launchpad.net/gdp); see
|
|
7 |
# http://bazaar.launchpad.net/~sinzui/gdp/trunk/files/head%3A/plugins/gdp/ &
|
|
8 |
# http://bazaar.launchpad.net/%7Esinzui/gdp/trunk/annotate/head%3A/COPYING.
|
|
9 |
||
10 |
"""Reformat a doctest to Launchpad style."""
|
|
11 |
||
12 |
__metatype__ = type |
|
13 |
||
14 |
import compiler |
|
15 |
from difflib import unified_diff |
|
16 |
from doctest import DocTestParser, Example |
|
17 |
from optparse import OptionParser |
|
18 |
import re |
|
19 |
import sys |
|
20 |
from textwrap import wrap |
|
21 |
||
22 |
import pyflakes |
|
23 |
from pyflakes.checker import Checker |
|
24 |
||
25 |
||
26 |
class DoctestReviewer: |
|
27 |
"""Check and reformat doctests."""
|
|
28 |
rule_pattern = re.compile(r'([=~-])+[ ]*$') |
|
29 |
moin_pattern = re.compile(r'^(=+)[ ](.+)[ ](=+[ ]*)$') |
|
30 |
continuation_pattern = re.compile(r'^(\s*\.\.\.) (.+)$', re.M) |
|
31 |
||
32 |
SOURCE = 'source' |
|
33 |
WANT = 'want' |
|
34 |
NARRATIVE = 'narrative' |
|
35 |
||
36 |
def __init__(self, doctest, file_name): |
|
37 |
self.doctest = doctest |
|
38 |
self.file_name = file_name |
|
39 |
doctest = self._disambuguate_doctest(doctest) |
|
40 |
parser = DocTestParser() |
|
41 |
self.parts = parser.parse(doctest, file_name) |
|
42 |
self.blocks = [] |
|
43 |
self.block = [] |
|
44 |
self.block_method = self.preserve_block |
|
45 |
self.code_lines = [] |
|
46 |
self.example = None |
|
47 |
self.last_bad_indent = 0 |
|
48 |
self.has_printed_filename = False |
|
49 |
||
50 |
def _disambuguate_doctest(self, doctest): |
|
51 |
"""Clarify continuations that the doctest parser hides."""
|
|
52 |
return self.continuation_pattern.sub(r'\1 \2', doctest) |
|
53 |
||
54 |
def _print_message(self, message, lineno): |
|
55 |
"""Print the error message with the lineno.
|
|
56 |
||
57 |
:param message: The message to print.
|
|
58 |
:param lineno: The line number the message pertains to.
|
|
59 |
"""
|
|
60 |
if not self.has_printed_filename: |
|
61 |
print '%s:' % self.file_name |
|
62 |
self.has_printed_filename = True |
|
63 |
print ' % 4s: %s' % (lineno, message) |
|
64 |
||
65 |
def _is_formatted(self, text): |
|
66 |
"""Return True if the text is pre-formatted, otherwise False.
|
|
67 |
||
68 |
:param: text a string, or a list of strings.
|
|
69 |
"""
|
|
70 |
if isinstance(text, list): |
|
71 |
text = text[0] |
|
72 |
return text.startswith(' ') |
|
73 |
||
74 |
def _walk(self, doctest_parts): |
|
75 |
"""Walk the doctest parts; yield the line and kind.
|
|
76 |
||
77 |
Yield the content of the line, and its kind (SOURCE, WANT, NARRATIVE).
|
|
78 |
SOURCE and WANT lines are stripped of indentation, SOURCE is also
|
|
79 |
stripped of the interpreter symbols.
|
|
80 |
|
|
81 |
:param doctest_parts: The output of DocTestParser.parse.
|
|
82 |
"""
|
|
83 |
for part in doctest_parts: |
|
84 |
if part == '': |
|
85 |
continue
|
|
86 |
if isinstance(part, Example): |
|
87 |
self.example = part |
|
88 |
for line in part.source.splitlines(): |
|
89 |
kind = DoctestReviewer.SOURCE |
|
90 |
yield line, kind |
|
91 |
for line in part.want.splitlines(): |
|
92 |
kind = DoctestReviewer.WANT |
|
93 |
yield line, kind |
|
94 |
else: |
|
95 |
self.example = None |
|
96 |
kind = DoctestReviewer.NARRATIVE |
|
97 |
for line in part.splitlines(): |
|
98 |
yield line, kind |
|
99 |
||
100 |
def _apply(self, line_methods): |
|
101 |
"""Call each line_method for each line in the doctest.
|
|
102 |
||
103 |
:param line_methods: a list of methods that accept lineno, line,
|
|
104 |
and kind as arguments. Each method must return the line for
|
|
105 |
the next method to process.
|
|
106 |
"""
|
|
107 |
self.blocks = [] |
|
108 |
self.block = [] |
|
109 |
lineno = 0 |
|
110 |
previous_kind = DoctestReviewer.NARRATIVE |
|
111 |
for line, kind in self._walk(self.parts): |
|
112 |
lineno += 1 |
|
113 |
self._append_source(kind, line) |
|
114 |
if kind != previous_kind and kind != DoctestReviewer.WANT: |
|
115 |
# The WANT block must adjoin the preceding SOURCE block.
|
|
116 |
self._store_block(previous_kind) |
|
117 |
for method in line_methods: |
|
118 |
line = method(lineno, line, kind, previous_kind) |
|
119 |
if line is None: |
|
120 |
break
|
|
121 |
if not line: |
|
122 |
continue
|
|
123 |
self.block.append(line) |
|
124 |
previous_kind = kind |
|
125 |
# Capture the last block and a blank line.
|
|
126 |
self.block.append('\n') |
|
127 |
self._store_block(previous_kind) |
|
128 |
||
129 |
def _append_source(self, kind, line): |
|
130 |
"""Update the list of source code lines seen."""
|
|
131 |
if kind == self.SOURCE: |
|
132 |
self.code_lines.append(line) |
|
133 |
else: |
|
134 |
self.code_lines.append('') |
|
135 |
||
136 |
def _store_block(self, kind): |
|
137 |
"""Append the block to blocks, re-wrap unformatted narrative.
|
|
138 |
||
139 |
:param kind: The block's kind (SOURCE, WANT, NARRATIVE)
|
|
140 |
"""
|
|
141 |
if len(self.block) == 0: |
|
142 |
return
|
|
143 |
block = self.block_method(kind, self.block, self.blocks) |
|
144 |
self.blocks.append('\n'.join(block)) |
|
145 |
self.block = [] |
|
146 |
||
147 |
def check(self): |
|
148 |
"""Check the doctest for style and code issues.
|
|
149 |
||
150 |
1. Check line lengths.
|
|
151 |
2. Check that headings are not in Moin format.
|
|
152 |
3. Check indentation.
|
|
153 |
4. Check trailing whitespace.
|
|
154 |
"""
|
|
155 |
self.code_lines = [] |
|
156 |
line_checkers = [ |
|
157 |
self.check_length, |
|
158 |
self.check_heading, |
|
159 |
self.check_indentation, |
|
160 |
self.check_trailing_whitespace,] |
|
161 |
self._apply(line_checkers) |
|
162 |
code = '\n'.join(self.code_lines) |
|
163 |
self.check_source_code(code) |
|
164 |
||
165 |
def format(self): |
|
166 |
"""Reformat doctest.
|
|
167 |
||
168 |
1. Tests are reindented to 4 spaces.
|
|
169 |
2. Simple narrative is rewrapped to 78 character width.
|
|
170 |
3. Formatted (indented) narrative is preserved.
|
|
171 |
4. Moin headings are converted to RSR =, == , and === levels.
|
|
172 |
5. There is one blank line between blocks,
|
|
173 |
6. Except for headers which have two leading blank lines.
|
|
174 |
7. All trailing whitespace is removed.
|
|
175 |
||
176 |
SOURCE and WANT long lines are not fixed--this is a human operation.
|
|
177 |
"""
|
|
178 |
line_checkers = [ |
|
179 |
self.fix_trailing_whitespace, |
|
180 |
self.fix_indentation, |
|
181 |
self.fix_heading, |
|
182 |
self.fix_narrative_paragraph,] |
|
183 |
self.block_method = self.format_block |
|
184 |
self._apply(line_checkers) |
|
185 |
self.block_method = self.preserve_block |
|
186 |
return '\n\n'.join(self.blocks) |
|
187 |
||
188 |
def preserve_block(self, kind, block, blocks): |
|
189 |
"""Do nothing to the block.
|
|
190 |
||
191 |
:param kind: The block's kind (SOURCE, WANT, NARRATIVE)
|
|
192 |
:param block: The list of lines that should remain together.
|
|
193 |
:param blocks: The list of all collected blocks.
|
|
194 |
"""
|
|
195 |
return block |
|
196 |
||
197 |
def format_block(self, kind, block, blocks): |
|
198 |
"""Format paragraph blocks.
|
|
199 |
||
200 |
:param kind: The block's kind (SOURCE, WANT, NARRATIVE)
|
|
201 |
:param block: The list of lines that should remain together.
|
|
202 |
:param blocks: The list of all collected blocks.
|
|
203 |
"""
|
|
204 |
if kind != DoctestReviewer.NARRATIVE or self._is_formatted(block): |
|
205 |
return block |
|
206 |
try: |
|
207 |
rules = ('===', '---', '...') |
|
208 |
last_line = block[-1] |
|
209 |
is_heading = last_line[0:3] in rules and last_line[-3:] in rules |
|
210 |
except IndexError: |
|
211 |
is_heading = False |
|
212 |
if len(blocks) != 0 and is_heading: |
|
213 |
# Headings should have an extra leading blank line.
|
|
214 |
block.insert(0, '') |
|
215 |
elif is_heading: |
|
216 |
# Do nothing. This is the first heading in the file.
|
|
217 |
pass
|
|
218 |
else: |
|
219 |
long_line = ' '.join(block).strip() |
|
220 |
block = wrap(long_line, 72) |
|
221 |
return block |
|
222 |
||
223 |
def is_comment(self, line): |
|
224 |
"""Return True if the line is a comment."""
|
|
225 |
comment_pattern = re.compile(r'\s*#') |
|
226 |
return comment_pattern.match(line) is not None |
|
227 |
||
228 |
def check_length(self, lineno, line, kind, previous_kind): |
|
229 |
"""Check the length of the line.
|
|
230 |
||
231 |
Each kind of line has a maximum length:
|
|
232 |
||
233 |
* NARRATIVE: 78 characters.
|
|
234 |
* SOURCE: 70 characters (discounting indentation and interpreter).
|
|
235 |
* WANT: 74 characters (discounting indentation).
|
|
236 |
"""
|
|
237 |
||
238 |
length = len(line) |
|
239 |
if kind == DoctestReviewer.NARRATIVE and self.is_comment(line): |
|
240 |
# comments follow WANT rules because they are in code.
|
|
241 |
kind = DoctestReviewer.WANT |
|
242 |
line = line.lstrip() |
|
243 |
if kind == DoctestReviewer.NARRATIVE and length > 78: |
|
244 |
self._print_message('%s exceeds 78 characters.' % kind, lineno) |
|
245 |
elif kind == DoctestReviewer.WANT and length > 74: |
|
246 |
self._print_message('%s exceeds 78 characters.' % kind, lineno) |
|
247 |
elif kind == DoctestReviewer.SOURCE and length > 70: |
|
248 |
self._print_message('%s exceeds 78 characters.' % kind, lineno) |
|
249 |
else: |
|
250 |
# This line has a good length.
|
|
251 |
pass
|
|
252 |
return line |
|
253 |
||
254 |
def check_indentation(self, lineno, line, kind, previous_kind): |
|
255 |
"""Check the indentation of the SOURCE or WANT line."""
|
|
256 |
if kind == DoctestReviewer.NARRATIVE: |
|
257 |
return line |
|
258 |
if self.example.indent != 4: |
|
259 |
if self.last_bad_indent != lineno - 1: |
|
260 |
self._print_message('%s has bad indentation.' % kind, lineno) |
|
261 |
self.last_bad_indent = lineno |
|
262 |
return line |
|
263 |
||
264 |
def check_trailing_whitespace(self, lineno, line, kind, previous_kind): |
|
265 |
"""Check for the presence of trailing whitespace in the line."""
|
|
266 |
if line.endswith(' '): |
|
267 |
self._print_message('%s has trailing whitespace.' % kind, lineno) |
|
268 |
return line |
|
269 |
||
270 |
def check_heading(self, lineno, line, kind, previous_kind): |
|
271 |
"""Check for narrative lines that use moin headers instead of RST."""
|
|
272 |
if kind != DoctestReviewer.NARRATIVE: |
|
273 |
return line |
|
274 |
moin = self.moin_pattern.match(line) |
|
275 |
if moin is not None: |
|
276 |
self._print_message('%s uses a moin header.' % kind, lineno - 1) |
|
277 |
return line |
|
278 |
||
279 |
def check_source_code(self, code): |
|
280 |
"""Check for source code problems in the doctest using pyflakes.
|
|
281 |
||
282 |
The most common problem found are unused imports. `UndefinedName`
|
|
283 |
errors are suppressed because the test setup is not known.
|
|
284 |
"""
|
|
285 |
if code == '': |
|
286 |
return
|
|
287 |
try: |
|
288 |
tree = compiler.parse(code) |
|
289 |
except (SyntaxError, IndentationError), exc: |
|
290 |
(lineno, offset_, line) = exc[1][1:] |
|
291 |
if line.endswith("\n"): |
|
292 |
line = line[:-1] |
|
293 |
self._print_message( |
|
294 |
'Could not compile:\n %s' % line, lineno - 1) |
|
295 |
else: |
|
296 |
warnings = Checker(tree) |
|
297 |
for warning in warnings.messages: |
|
298 |
if isinstance(warning, pyflakes.messages.UndefinedName): |
|
299 |
continue
|
|
300 |
dummy, lineno, message = str(warning).split(':') |
|
301 |
self._print_message(message.strip(), lineno) |
|
302 |
||
303 |
def fix_trailing_whitespace(self, lineno, line, kind, previous_kind): |
|
304 |
"""Return the line striped of trailing whitespace."""
|
|
305 |
return line.rstrip() |
|
306 |
||
307 |
def fix_indentation(self, lineno, line, kind, previous_kind): |
|
308 |
"""set the indentation to 4-spaces."""
|
|
309 |
if kind == DoctestReviewer.NARRATIVE: |
|
310 |
return line |
|
311 |
elif kind == DoctestReviewer.WANT: |
|
312 |
return ' %s' % line |
|
313 |
else: |
|
314 |
if line.startswith(' '): |
|
315 |
# This is a continuation of DoctestReviewer.SOURCE.
|
|
316 |
return ' ... %s' % line |
|
317 |
else: |
|
318 |
# This is a start of DoctestReviewer.SOURCE.
|
|
319 |
return ' >>> %s' % line |
|
320 |
||
321 |
def fix_heading(self, lineno, line, kind, previous_kind): |
|
322 |
"""Switch Moin headings to RST headings."""
|
|
323 |
if kind != DoctestReviewer.NARRATIVE: |
|
324 |
return line |
|
325 |
moin = self.moin_pattern.match(line) |
|
326 |
if moin is None: |
|
327 |
return line |
|
328 |
heading_level = len(moin.group(1)) |
|
329 |
heading = moin.group(2) |
|
330 |
rule_length = len(heading) |
|
331 |
if heading_level == 1: |
|
332 |
rule = '=' * rule_length |
|
333 |
elif heading_level == 2: |
|
334 |
rule = '-' * rule_length |
|
335 |
else: |
|
336 |
rule = '.' * rule_length |
|
337 |
# Force the heading on to the block of lines.
|
|
338 |
self.block.append(heading) |
|
339 |
return rule |
|
340 |
||
341 |
def fix_narrative_paragraph(self, lineno, line, kind, previous_kind): |
|
342 |
"""Break narrative into paragraphs."""
|
|
343 |
if kind != DoctestReviewer.NARRATIVE or len(self.block) == 0: |
|
344 |
return line |
|
345 |
if line == '': |
|
346 |
# This is the start of a new paragraph in the narrative.
|
|
347 |
self._store_block(previous_kind) |
|
348 |
if self._is_formatted(line) and not self._is_formatted(self.block): |
|
349 |
# This line starts a pre-formatted paragraph.
|
|
350 |
self._store_block(previous_kind) |
|
351 |
return line |
|
352 |
||
353 |
||
354 |
def get_option_parser(): |
|
355 |
"""Return the option parser for this program."""
|
|
356 |
usage = "usage: %prog [options] doctest.txt" |
|
357 |
parser = OptionParser(usage=usage) |
|
358 |
parser.add_option( |
|
359 |
"-f", "--format", dest="is_format", action="store_true", |
|
360 |
help="Reformat the doctest.") |
|
361 |
parser.add_option( |
|
362 |
"-i", "--interactive", dest="is_interactive", action="store_true", |
|
363 |
help="Approve each change.") |
|
364 |
parser.set_defaults( |
|
365 |
is_format=False, |
|
366 |
is_interactive=False) |
|
367 |
return parser |
|
368 |
||
369 |
||
370 |
def main(argv=None): |
|
371 |
"""Run the operations requested from the command line."""
|
|
372 |
if argv is None: |
|
373 |
argv = sys.argv |
|
374 |
parser = get_option_parser() |
|
375 |
(options, args) = parser.parse_args(args=argv[1:]) |
|
376 |
if len(args) == 0: |
|
377 |
parser.error("A doctest must be specified.") |
|
378 |
||
379 |
for file_name in args: |
|
380 |
try: |
|
381 |
doctest_file = open(file_name) |
|
382 |
old_doctest = doctest_file.read() |
|
383 |
finally: |
|
384 |
doctest_file.close() |
|
385 |
reviewer = DoctestReviewer(old_doctest, file_name) |
|
386 |
||
387 |
if not options.is_format: |
|
388 |
reviewer.check() |
|
389 |
continue
|
|
390 |
||
391 |
new_doctest = reviewer.format() |
|
392 |
if new_doctest != old_doctest: |
|
393 |
if options.is_interactive: |
|
394 |
diff = unified_diff( |
|
395 |
old_doctest.splitlines(), new_doctest.splitlines()) |
|
396 |
print '\n'.join(diff) |
|
397 |
print '\n' |
|
398 |
do_save = raw_input( |
|
399 |
'Do you wish to save the changes? S(ave) or C(ancel)?') |
|
400 |
else: |
|
401 |
do_save = 'S' |
|
402 |
||
403 |
if do_save.upper() == 'S': |
|
404 |
try: |
|
405 |
doctest_file = open(file_name, 'w') |
|
406 |
doctest_file.write(new_doctest) |
|
407 |
finally: |
|
408 |
doctest_file.close() |
|
409 |
reviewer = DoctestReviewer(new_doctest, file_name) |
|
410 |
reviewer.check() |
|
411 |
||
412 |
||
413 |
if __name__ == '__main__': |
|
414 |
sys.exit(main()) |