~launchpad-pqm/launchpad/devel

9492.1.1 by Karl Fogel
Add utilities/formatdoctest.py and utilities/migrater/, both brought
1
#!/usr/bin/python
2
#
3
# Copyright (C) 2009 - Curtis Hovey <sinzui.is at verizon.net>
4
# This software is licensed under the GNU General Public License version 2.
5
#
6
# It comes from the Gedit Developer Plugins project (launchpad.net/gdp); see
7
# http://bazaar.launchpad.net/~sinzui/gdp/trunk/files/head%3A/plugins/gdp/ &
8
# http://bazaar.launchpad.net/%7Esinzui/gdp/trunk/annotate/head%3A/COPYING.
9
10
"""Reformat a doctest to Launchpad style."""
11
12
__metatype__ = type
13
14
import compiler
15
from difflib import unified_diff
16
from doctest import DocTestParser, Example
17
from optparse import OptionParser
18
import re
19
import sys
20
from textwrap import wrap
21
22
import pyflakes
23
from pyflakes.checker import Checker
24
25
26
class DoctestReviewer:
27
    """Check and reformat doctests."""
28
    rule_pattern = re.compile(r'([=~-])+[ ]*$')
29
    moin_pattern = re.compile(r'^(=+)[ ](.+)[ ](=+[ ]*)$')
30
    continuation_pattern = re.compile(r'^(\s*\.\.\.) (.+)$', re.M)
31
32
    SOURCE = 'source'
33
    WANT = 'want'
34
    NARRATIVE = 'narrative'
35
36
    def __init__(self, doctest, file_name):
37
        self.doctest = doctest
38
        self.file_name = file_name
39
        doctest = self._disambuguate_doctest(doctest)
40
        parser = DocTestParser()
41
        self.parts = parser.parse(doctest, file_name)
42
        self.blocks = []
43
        self.block = []
44
        self.block_method = self.preserve_block
45
        self.code_lines = []
46
        self.example = None
47
        self.last_bad_indent = 0
48
        self.has_printed_filename = False
49
50
    def _disambuguate_doctest(self, doctest):
51
        """Clarify continuations that the doctest parser hides."""
52
        return self.continuation_pattern.sub(r'\1    \2', doctest)
53
54
    def _print_message(self, message, lineno):
55
        """Print the error message with the lineno.
56
57
        :param message: The message to print.
58
        :param lineno: The line number the message pertains to.
59
        """
60
        if not self.has_printed_filename:
61
            print '%s:' % self.file_name
62
            self.has_printed_filename = True
63
        print '    % 4s: %s' % (lineno, message)
64
65
    def _is_formatted(self, text):
66
        """Return True if the text is pre-formatted, otherwise False.
67
68
        :param: text a string, or a list of strings.
69
        """
70
        if isinstance(text, list):
71
            text = text[0]
72
        return text.startswith(' ')
73
74
    def _walk(self, doctest_parts):
75
        """Walk the doctest parts; yield the line and kind.
76
77
        Yield the content of the line, and its kind (SOURCE, WANT, NARRATIVE).
78
        SOURCE and WANT lines are stripped of indentation, SOURCE is also
79
        stripped of the interpreter symbols.
80
        
81
        :param doctest_parts: The output of DocTestParser.parse.
82
        """
83
        for part in doctest_parts:
84
            if part == '':
85
                continue
86
            if isinstance(part, Example):
87
                self.example = part
88
                for line in part.source.splitlines():
89
                    kind = DoctestReviewer.SOURCE
90
                    yield line, kind
91
                for line in part.want.splitlines():
92
                    kind = DoctestReviewer.WANT
93
                    yield line, kind
94
            else:
95
                self.example = None
96
                kind = DoctestReviewer.NARRATIVE
97
                for line in part.splitlines():
98
                    yield line, kind
99
100
    def _apply(self, line_methods):
101
        """Call each line_method for each line in the doctest.
102
103
        :param line_methods: a list of methods that accept lineno, line,
104
            and kind as arguments. Each method must return the line for
105
            the next method to process.
106
        """
107
        self.blocks = []
108
        self.block = []
109
        lineno = 0
110
        previous_kind = DoctestReviewer.NARRATIVE
111
        for line, kind in self._walk(self.parts):
112
            lineno += 1
113
            self._append_source(kind, line)
114
            if kind != previous_kind and kind != DoctestReviewer.WANT:
115
                # The WANT block must adjoin the preceding SOURCE block.
116
                self._store_block(previous_kind)
117
            for method in line_methods:
118
                line = method(lineno, line, kind, previous_kind)
119
                if line is None:
120
                    break
121
            if not line:
122
                continue
123
            self.block.append(line)
124
            previous_kind = kind
125
        # Capture the last block and a blank line.
126
        self.block.append('\n')
127
        self._store_block(previous_kind)
128
129
    def _append_source(self, kind, line):
130
        """Update the list of source code lines seen."""
131
        if kind == self.SOURCE:
132
            self.code_lines.append(line)
133
        else:
134
            self.code_lines.append('')
135
136
    def _store_block(self, kind):
137
        """Append the block to blocks, re-wrap unformatted narrative.
138
139
        :param kind: The block's kind (SOURCE, WANT, NARRATIVE)
140
        """
141
        if len(self.block) == 0:
142
            return
143
        block = self.block_method(kind, self.block, self.blocks)
144
        self.blocks.append('\n'.join(block))
145
        self.block = []
146
147
    def check(self):
148
        """Check the doctest for style and code issues.
149
150
        1. Check line lengths.
151
        2. Check that headings are not in Moin format.
152
        3. Check indentation.
153
        4. Check trailing whitespace.
154
        """
155
        self.code_lines = []
156
        line_checkers = [
157
            self.check_length,
158
            self.check_heading,
159
            self.check_indentation,
160
            self.check_trailing_whitespace,]
161
        self._apply(line_checkers)
162
        code = '\n'.join(self.code_lines)
163
        self.check_source_code(code)
164
165
    def format(self):
166
        """Reformat doctest.
167
168
        1. Tests are reindented to 4 spaces.
169
        2. Simple narrative is rewrapped to 78 character width.
170
        3. Formatted (indented) narrative is preserved.
171
        4. Moin headings are converted to RSR =, == , and === levels.
172
        5. There is one blank line between blocks,
173
        6. Except for headers which have two leading blank lines.
174
        7. All trailing whitespace is removed.
175
176
        SOURCE and WANT long lines are not fixed--this is a human operation.
177
        """
178
        line_checkers = [
179
            self.fix_trailing_whitespace,
180
            self.fix_indentation,
181
            self.fix_heading,
182
            self.fix_narrative_paragraph,]
183
        self.block_method = self.format_block
184
        self._apply(line_checkers)
185
        self.block_method = self.preserve_block
186
        return '\n\n'.join(self.blocks)
187
188
    def preserve_block(self, kind, block, blocks):
189
        """Do nothing to the block.
190
191
        :param kind: The block's kind (SOURCE, WANT, NARRATIVE)
192
        :param block: The list of lines that should remain together.
193
        :param blocks: The list of all collected blocks.
194
        """
195
        return block
196
197
    def format_block(self, kind, block, blocks):
198
        """Format paragraph blocks.
199
200
        :param kind: The block's kind (SOURCE, WANT, NARRATIVE)
201
        :param block: The list of lines that should remain together.
202
        :param blocks: The list of all collected blocks.
203
        """
204
        if kind != DoctestReviewer.NARRATIVE or self._is_formatted(block):
205
            return block
206
        try:
207
            rules = ('===', '---', '...')
208
            last_line = block[-1]
209
            is_heading = last_line[0:3] in rules and last_line[-3:] in rules
210
        except IndexError:
211
            is_heading = False
212
        if len(blocks) != 0 and is_heading:
213
            # Headings should have an extra leading blank line.
214
            block.insert(0, '')
215
        elif is_heading:
216
            # Do nothing. This is the first heading in the file.
217
            pass
218
        else:
219
            long_line = ' '.join(block).strip()
220
            block = wrap(long_line, 72)
221
        return block
222
223
    def is_comment(self, line):
224
        """Return True if the line is a comment."""
225
        comment_pattern = re.compile(r'\s*#')
226
        return comment_pattern.match(line) is not None
227
228
    def check_length(self, lineno, line, kind, previous_kind):
229
        """Check the length of the line.
230
231
        Each kind of line has a maximum length:
232
233
        * NARRATIVE: 78 characters.
234
        * SOURCE: 70 characters (discounting indentation and interpreter).
235
        * WANT: 74 characters (discounting indentation).
236
        """
237
238
        length = len(line)
239
        if kind == DoctestReviewer.NARRATIVE and self.is_comment(line):
240
            # comments follow WANT rules because they are in code.
241
            kind = DoctestReviewer.WANT
242
            line = line.lstrip()
243
        if kind == DoctestReviewer.NARRATIVE and length > 78:
244
            self._print_message('%s exceeds 78 characters.' % kind, lineno)
245
        elif kind == DoctestReviewer.WANT and length > 74:
246
            self._print_message('%s exceeds 78 characters.' % kind, lineno)
247
        elif kind == DoctestReviewer.SOURCE and length > 70:
248
            self._print_message('%s exceeds 78 characters.' % kind, lineno)
249
        else:
250
            # This line has a good length.
251
            pass
252
        return line
253
254
    def check_indentation(self, lineno, line, kind, previous_kind):
255
        """Check the indentation of the SOURCE or WANT line."""
256
        if kind == DoctestReviewer.NARRATIVE:
257
            return line
258
        if self.example.indent != 4:
259
            if self.last_bad_indent != lineno - 1:
260
                self._print_message('%s has bad indentation.' % kind, lineno)
261
            self.last_bad_indent = lineno
262
        return line
263
264
    def check_trailing_whitespace(self, lineno, line, kind, previous_kind):
265
        """Check for the presence of trailing whitespace in the line."""
266
        if line.endswith(' '):
267
            self._print_message('%s has trailing whitespace.' % kind, lineno)
268
        return line
269
270
    def check_heading(self, lineno, line, kind, previous_kind):
271
        """Check for narrative lines that use moin headers instead of RST."""
272
        if kind != DoctestReviewer.NARRATIVE:
273
            return line
274
        moin = self.moin_pattern.match(line)
275
        if moin is not None:
276
            self._print_message('%s uses a moin header.' % kind, lineno - 1)
277
        return line
278
279
    def check_source_code(self, code):
280
        """Check for source code problems in the doctest using pyflakes.
281
282
        The most common problem found are unused imports. `UndefinedName`
283
        errors are suppressed because the test setup is not known.
284
        """
285
        if code == '':
286
            return
287
        try:
288
            tree = compiler.parse(code)
289
        except (SyntaxError, IndentationError), exc:
290
            (lineno, offset_, line) = exc[1][1:]
291
            if line.endswith("\n"):
292
                line = line[:-1]
293
            self._print_message(
294
                'Could not compile:\n          %s' % line, lineno - 1)
295
        else:
296
            warnings = Checker(tree)
297
            for warning in warnings.messages:
298
                if isinstance(warning, pyflakes.messages.UndefinedName):
299
                    continue
300
                dummy, lineno, message = str(warning).split(':')
301
                self._print_message(message.strip(), lineno)
302
303
    def fix_trailing_whitespace(self, lineno, line, kind, previous_kind):
304
        """Return the line striped of trailing whitespace."""
305
        return line.rstrip()
306
307
    def fix_indentation(self, lineno, line, kind, previous_kind):
308
        """set the indentation to 4-spaces."""
309
        if kind == DoctestReviewer.NARRATIVE:
310
            return line
311
        elif kind == DoctestReviewer.WANT:
312
            return '    %s' % line
313
        else:
314
            if line.startswith(' '):
315
                # This is a continuation of DoctestReviewer.SOURCE.
316
                return '    ... %s' % line
317
            else:
318
                # This is a start of DoctestReviewer.SOURCE.
319
                return '    >>> %s' % line
320
321
    def fix_heading(self, lineno, line, kind, previous_kind):
322
        """Switch Moin headings to RST headings."""
323
        if kind != DoctestReviewer.NARRATIVE:
324
            return line
325
        moin = self.moin_pattern.match(line)
326
        if moin is None:
327
            return line
328
        heading_level = len(moin.group(1))
329
        heading = moin.group(2)
330
        rule_length = len(heading)
331
        if heading_level == 1:
332
            rule = '=' * rule_length
333
        elif heading_level == 2:
334
            rule = '-' * rule_length
335
        else:
336
            rule = '.' * rule_length
337
        # Force the heading on to the block of lines.
338
        self.block.append(heading)
339
        return rule
340
341
    def fix_narrative_paragraph(self, lineno, line, kind, previous_kind):
342
        """Break narrative into paragraphs."""
343
        if kind != DoctestReviewer.NARRATIVE or len(self.block) == 0:
344
            return line
345
        if line == '':
346
            # This is the start of a new paragraph in the narrative.
347
            self._store_block(previous_kind)
348
        if self._is_formatted(line) and not self._is_formatted(self.block):
349
            # This line starts a pre-formatted paragraph.
350
            self._store_block(previous_kind)
351
        return line
352
353
354
def get_option_parser():
355
    """Return the option parser for this program."""
356
    usage = "usage: %prog [options] doctest.txt"
357
    parser = OptionParser(usage=usage)
358
    parser.add_option(
359
        "-f", "--format", dest="is_format", action="store_true",
360
        help="Reformat the doctest.")
361
    parser.add_option(
362
        "-i", "--interactive", dest="is_interactive",  action="store_true",
363
        help="Approve each change.")
364
    parser.set_defaults(
365
        is_format=False,
366
        is_interactive=False)
367
    return parser
368
369
370
def main(argv=None):
371
    """Run the operations requested from the command line."""
372
    if argv is None:
373
        argv = sys.argv
374
    parser = get_option_parser()
375
    (options, args) = parser.parse_args(args=argv[1:])
376
    if len(args) == 0:
377
        parser.error("A doctest must be specified.")
378
379
    for file_name in args:
380
        try:
381
            doctest_file = open(file_name)
382
            old_doctest = doctest_file.read()
383
        finally:
384
            doctest_file.close()
385
        reviewer = DoctestReviewer(old_doctest, file_name)
386
387
        if not options.is_format:
388
            reviewer.check()
389
            continue
390
391
        new_doctest = reviewer.format()
392
        if new_doctest != old_doctest:
393
            if options.is_interactive:
394
                diff = unified_diff(
395
                    old_doctest.splitlines(), new_doctest.splitlines())
396
                print '\n'.join(diff)
397
                print '\n'
398
                do_save = raw_input(
399
                    'Do you wish to save the changes? S(ave) or C(ancel)?')
400
            else:
401
                do_save = 'S'
402
403
            if do_save.upper() == 'S':
404
                try:
405
                    doctest_file = open(file_name, 'w')
406
                    doctest_file.write(new_doctest)
407
                finally:
408
                    doctest_file.close()
409
            reviewer = DoctestReviewer(new_doctest, file_name)
410
            reviewer.check()
411
412
413
if __name__ == '__main__':
414
    sys.exit(main())