~launchpad-pqm/launchpad/devel

9492.1.1 by Karl Fogel
Add utilities/formatdoctest.py and utilities/migrater/, both brought
1
#!/usr/bin/python
2
#
3
# Copyright (C) 2009 - Curtis Hovey <sinzui.is at verizon.net>
4
# This software is licensed under the GNU General Public License version 2.
5
#
6
# It comes from the Gedit Developer Plugins project (launchpad.net/gdp); see
7
# http://bazaar.launchpad.net/~sinzui/gdp/trunk/files/head%3A/plugins/gdp/ &
8
# http://bazaar.launchpad.net/%7Esinzui/gdp/trunk/annotate/head%3A/COPYING.
9
10
"""Reformat a doctest to Launchpad style."""
11
12
__metatype__ = type
13
14
import compiler
15
from difflib import unified_diff
14612.2.6 by William Grant
utilities
16
from doctest import (
17
    DocTestParser,
18
    Example,
19
    )
9492.1.1 by Karl Fogel
Add utilities/formatdoctest.py and utilities/migrater/, both brought
20
from optparse import OptionParser
21
import re
22
import sys
23
from textwrap import wrap
24
25
import pyflakes
26
from pyflakes.checker import Checker
27
28
29
class DoctestReviewer:
30
    """Check and reformat doctests."""
31
    rule_pattern = re.compile(r'([=~-])+[ ]*$')
32
    moin_pattern = re.compile(r'^(=+)[ ](.+)[ ](=+[ ]*)$')
33
    continuation_pattern = re.compile(r'^(\s*\.\.\.) (.+)$', re.M)
34
35
    SOURCE = 'source'
36
    WANT = 'want'
37
    NARRATIVE = 'narrative'
38
39
    def __init__(self, doctest, file_name):
40
        self.doctest = doctest
41
        self.file_name = file_name
42
        doctest = self._disambuguate_doctest(doctest)
43
        parser = DocTestParser()
44
        self.parts = parser.parse(doctest, file_name)
45
        self.blocks = []
46
        self.block = []
47
        self.block_method = self.preserve_block
48
        self.code_lines = []
49
        self.example = None
50
        self.last_bad_indent = 0
51
        self.has_printed_filename = False
52
53
    def _disambuguate_doctest(self, doctest):
54
        """Clarify continuations that the doctest parser hides."""
55
        return self.continuation_pattern.sub(r'\1    \2', doctest)
56
57
    def _print_message(self, message, lineno):
58
        """Print the error message with the lineno.
59
60
        :param message: The message to print.
61
        :param lineno: The line number the message pertains to.
62
        """
63
        if not self.has_printed_filename:
64
            print '%s:' % self.file_name
65
            self.has_printed_filename = True
66
        print '    % 4s: %s' % (lineno, message)
67
68
    def _is_formatted(self, text):
69
        """Return True if the text is pre-formatted, otherwise False.
70
71
        :param: text a string, or a list of strings.
72
        """
73
        if isinstance(text, list):
74
            text = text[0]
75
        return text.startswith(' ')
76
77
    def _walk(self, doctest_parts):
78
        """Walk the doctest parts; yield the line and kind.
79
80
        Yield the content of the line, and its kind (SOURCE, WANT, NARRATIVE).
81
        SOURCE and WANT lines are stripped of indentation, SOURCE is also
82
        stripped of the interpreter symbols.
83
        
84
        :param doctest_parts: The output of DocTestParser.parse.
85
        """
86
        for part in doctest_parts:
87
            if part == '':
88
                continue
89
            if isinstance(part, Example):
90
                self.example = part
91
                for line in part.source.splitlines():
92
                    kind = DoctestReviewer.SOURCE
93
                    yield line, kind
94
                for line in part.want.splitlines():
95
                    kind = DoctestReviewer.WANT
96
                    yield line, kind
97
            else:
98
                self.example = None
99
                kind = DoctestReviewer.NARRATIVE
100
                for line in part.splitlines():
101
                    yield line, kind
102
103
    def _apply(self, line_methods):
104
        """Call each line_method for each line in the doctest.
105
106
        :param line_methods: a list of methods that accept lineno, line,
107
            and kind as arguments. Each method must return the line for
108
            the next method to process.
109
        """
110
        self.blocks = []
111
        self.block = []
112
        lineno = 0
113
        previous_kind = DoctestReviewer.NARRATIVE
114
        for line, kind in self._walk(self.parts):
115
            lineno += 1
116
            self._append_source(kind, line)
117
            if kind != previous_kind and kind != DoctestReviewer.WANT:
118
                # The WANT block must adjoin the preceding SOURCE block.
119
                self._store_block(previous_kind)
120
            for method in line_methods:
121
                line = method(lineno, line, kind, previous_kind)
122
                if line is None:
123
                    break
124
            if not line:
125
                continue
126
            self.block.append(line)
127
            previous_kind = kind
128
        # Capture the last block and a blank line.
129
        self.block.append('\n')
130
        self._store_block(previous_kind)
131
132
    def _append_source(self, kind, line):
133
        """Update the list of source code lines seen."""
134
        if kind == self.SOURCE:
135
            self.code_lines.append(line)
136
        else:
137
            self.code_lines.append('')
138
139
    def _store_block(self, kind):
140
        """Append the block to blocks, re-wrap unformatted narrative.
141
142
        :param kind: The block's kind (SOURCE, WANT, NARRATIVE)
143
        """
144
        if len(self.block) == 0:
145
            return
146
        block = self.block_method(kind, self.block, self.blocks)
147
        self.blocks.append('\n'.join(block))
148
        self.block = []
149
150
    def check(self):
151
        """Check the doctest for style and code issues.
152
153
        1. Check line lengths.
154
        2. Check that headings are not in Moin format.
155
        3. Check indentation.
156
        4. Check trailing whitespace.
157
        """
158
        self.code_lines = []
159
        line_checkers = [
160
            self.check_length,
161
            self.check_heading,
162
            self.check_indentation,
163
            self.check_trailing_whitespace,]
164
        self._apply(line_checkers)
165
        code = '\n'.join(self.code_lines)
166
        self.check_source_code(code)
167
168
    def format(self):
169
        """Reformat doctest.
170
171
        1. Tests are reindented to 4 spaces.
172
        2. Simple narrative is rewrapped to 78 character width.
173
        3. Formatted (indented) narrative is preserved.
174
        4. Moin headings are converted to RSR =, == , and === levels.
175
        5. There is one blank line between blocks,
176
        6. Except for headers which have two leading blank lines.
177
        7. All trailing whitespace is removed.
178
179
        SOURCE and WANT long lines are not fixed--this is a human operation.
180
        """
181
        line_checkers = [
182
            self.fix_trailing_whitespace,
183
            self.fix_indentation,
184
            self.fix_heading,
185
            self.fix_narrative_paragraph,]
186
        self.block_method = self.format_block
187
        self._apply(line_checkers)
188
        self.block_method = self.preserve_block
189
        return '\n\n'.join(self.blocks)
190
191
    def preserve_block(self, kind, block, blocks):
192
        """Do nothing to the block.
193
194
        :param kind: The block's kind (SOURCE, WANT, NARRATIVE)
195
        :param block: The list of lines that should remain together.
196
        :param blocks: The list of all collected blocks.
197
        """
198
        return block
199
200
    def format_block(self, kind, block, blocks):
201
        """Format paragraph blocks.
202
203
        :param kind: The block's kind (SOURCE, WANT, NARRATIVE)
204
        :param block: The list of lines that should remain together.
205
        :param blocks: The list of all collected blocks.
206
        """
207
        if kind != DoctestReviewer.NARRATIVE or self._is_formatted(block):
208
            return block
209
        try:
210
            rules = ('===', '---', '...')
211
            last_line = block[-1]
212
            is_heading = last_line[0:3] in rules and last_line[-3:] in rules
213
        except IndexError:
214
            is_heading = False
215
        if len(blocks) != 0 and is_heading:
216
            # Headings should have an extra leading blank line.
217
            block.insert(0, '')
218
        elif is_heading:
219
            # Do nothing. This is the first heading in the file.
220
            pass
221
        else:
222
            long_line = ' '.join(block).strip()
223
            block = wrap(long_line, 72)
224
        return block
225
226
    def is_comment(self, line):
227
        """Return True if the line is a comment."""
228
        comment_pattern = re.compile(r'\s*#')
229
        return comment_pattern.match(line) is not None
230
231
    def check_length(self, lineno, line, kind, previous_kind):
232
        """Check the length of the line.
233
234
        Each kind of line has a maximum length:
235
236
        * NARRATIVE: 78 characters.
237
        * SOURCE: 70 characters (discounting indentation and interpreter).
238
        * WANT: 74 characters (discounting indentation).
239
        """
240
241
        length = len(line)
242
        if kind == DoctestReviewer.NARRATIVE and self.is_comment(line):
243
            # comments follow WANT rules because they are in code.
244
            kind = DoctestReviewer.WANT
245
            line = line.lstrip()
246
        if kind == DoctestReviewer.NARRATIVE and length > 78:
247
            self._print_message('%s exceeds 78 characters.' % kind, lineno)
248
        elif kind == DoctestReviewer.WANT and length > 74:
249
            self._print_message('%s exceeds 78 characters.' % kind, lineno)
250
        elif kind == DoctestReviewer.SOURCE and length > 70:
251
            self._print_message('%s exceeds 78 characters.' % kind, lineno)
252
        else:
253
            # This line has a good length.
254
            pass
255
        return line
256
257
    def check_indentation(self, lineno, line, kind, previous_kind):
258
        """Check the indentation of the SOURCE or WANT line."""
259
        if kind == DoctestReviewer.NARRATIVE:
260
            return line
261
        if self.example.indent != 4:
262
            if self.last_bad_indent != lineno - 1:
263
                self._print_message('%s has bad indentation.' % kind, lineno)
264
            self.last_bad_indent = lineno
265
        return line
266
267
    def check_trailing_whitespace(self, lineno, line, kind, previous_kind):
268
        """Check for the presence of trailing whitespace in the line."""
269
        if line.endswith(' '):
270
            self._print_message('%s has trailing whitespace.' % kind, lineno)
271
        return line
272
273
    def check_heading(self, lineno, line, kind, previous_kind):
274
        """Check for narrative lines that use moin headers instead of RST."""
275
        if kind != DoctestReviewer.NARRATIVE:
276
            return line
277
        moin = self.moin_pattern.match(line)
278
        if moin is not None:
279
            self._print_message('%s uses a moin header.' % kind, lineno - 1)
280
        return line
281
282
    def check_source_code(self, code):
283
        """Check for source code problems in the doctest using pyflakes.
284
285
        The most common problem found are unused imports. `UndefinedName`
286
        errors are suppressed because the test setup is not known.
287
        """
288
        if code == '':
289
            return
290
        try:
291
            tree = compiler.parse(code)
292
        except (SyntaxError, IndentationError), exc:
293
            (lineno, offset_, line) = exc[1][1:]
294
            if line.endswith("\n"):
295
                line = line[:-1]
296
            self._print_message(
297
                'Could not compile:\n          %s' % line, lineno - 1)
298
        else:
299
            warnings = Checker(tree)
300
            for warning in warnings.messages:
301
                if isinstance(warning, pyflakes.messages.UndefinedName):
302
                    continue
303
                dummy, lineno, message = str(warning).split(':')
304
                self._print_message(message.strip(), lineno)
305
306
    def fix_trailing_whitespace(self, lineno, line, kind, previous_kind):
307
        """Return the line striped of trailing whitespace."""
308
        return line.rstrip()
309
310
    def fix_indentation(self, lineno, line, kind, previous_kind):
311
        """set the indentation to 4-spaces."""
312
        if kind == DoctestReviewer.NARRATIVE:
313
            return line
314
        elif kind == DoctestReviewer.WANT:
315
            return '    %s' % line
316
        else:
317
            if line.startswith(' '):
318
                # This is a continuation of DoctestReviewer.SOURCE.
319
                return '    ... %s' % line
320
            else:
321
                # This is a start of DoctestReviewer.SOURCE.
322
                return '    >>> %s' % line
323
324
    def fix_heading(self, lineno, line, kind, previous_kind):
325
        """Switch Moin headings to RST headings."""
326
        if kind != DoctestReviewer.NARRATIVE:
327
            return line
328
        moin = self.moin_pattern.match(line)
329
        if moin is None:
330
            return line
331
        heading_level = len(moin.group(1))
332
        heading = moin.group(2)
333
        rule_length = len(heading)
334
        if heading_level == 1:
335
            rule = '=' * rule_length
336
        elif heading_level == 2:
337
            rule = '-' * rule_length
338
        else:
339
            rule = '.' * rule_length
340
        # Force the heading on to the block of lines.
341
        self.block.append(heading)
342
        return rule
343
344
    def fix_narrative_paragraph(self, lineno, line, kind, previous_kind):
345
        """Break narrative into paragraphs."""
346
        if kind != DoctestReviewer.NARRATIVE or len(self.block) == 0:
347
            return line
348
        if line == '':
349
            # This is the start of a new paragraph in the narrative.
350
            self._store_block(previous_kind)
351
        if self._is_formatted(line) and not self._is_formatted(self.block):
352
            # This line starts a pre-formatted paragraph.
353
            self._store_block(previous_kind)
354
        return line
355
356
357
def get_option_parser():
358
    """Return the option parser for this program."""
359
    usage = "usage: %prog [options] doctest.txt"
360
    parser = OptionParser(usage=usage)
361
    parser.add_option(
362
        "-f", "--format", dest="is_format", action="store_true",
363
        help="Reformat the doctest.")
364
    parser.add_option(
365
        "-i", "--interactive", dest="is_interactive",  action="store_true",
366
        help="Approve each change.")
367
    parser.set_defaults(
368
        is_format=False,
369
        is_interactive=False)
370
    return parser
371
372
373
def main(argv=None):
374
    """Run the operations requested from the command line."""
375
    if argv is None:
376
        argv = sys.argv
377
    parser = get_option_parser()
378
    (options, args) = parser.parse_args(args=argv[1:])
379
    if len(args) == 0:
380
        parser.error("A doctest must be specified.")
381
382
    for file_name in args:
383
        try:
384
            doctest_file = open(file_name)
385
            old_doctest = doctest_file.read()
386
        finally:
387
            doctest_file.close()
388
        reviewer = DoctestReviewer(old_doctest, file_name)
389
390
        if not options.is_format:
391
            reviewer.check()
392
            continue
393
394
        new_doctest = reviewer.format()
395
        if new_doctest != old_doctest:
396
            if options.is_interactive:
397
                diff = unified_diff(
398
                    old_doctest.splitlines(), new_doctest.splitlines())
399
                print '\n'.join(diff)
400
                print '\n'
401
                do_save = raw_input(
402
                    'Do you wish to save the changes? S(ave) or C(ancel)?')
403
            else:
404
                do_save = 'S'
405
406
            if do_save.upper() == 'S':
407
                try:
408
                    doctest_file = open(file_name, 'w')
409
                    doctest_file.write(new_doctest)
410
                finally:
411
                    doctest_file.close()
412
            reviewer = DoctestReviewer(new_doctest, file_name)
413
            reviewer.check()
414
415
416
if __name__ == '__main__':
417
    sys.exit(main())