~launchpad-pqm/launchpad/devel

11461.2.1 by Henning Eggers
Added format-imports script and documented it.
1
#!/usr/bin/python
2
#
3
# Copyright 2010 Canonical Ltd.  This software is licensed under the
4
# GNU Affero General Public License version 3 (see the file LICENSE).
5
6
""" Format import sections in python files
7
8
= Usage =
9
10
format-imports <file or directory> ...
11
12
= Operation =
13
14
The script will process each filename on the command line. If the file is a
15
directory it recurses into it an process all *.py files found in the tree.
16
It will output the paths of all the files that have been changed.
17
11461.2.3 by Henning Eggers
Usage on the LP tree.
18
For Launchpad it was applied to the "lib/canonical/launchpad" and the "lib/lp"
19
subtrees. Running it with those parameters on a freshly branched LP tree
20
should not produce any output, meaning that all the files in the tree should
21
be formatted correctly.
22
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
23
The script identifies the import section of each file as a block of lines
24
that start with "import" or "from" or are indented with at least one space or
25
are blank lines. Comment lines are also included if they are followed by an
26
import statement. An inital __future__ import and a module docstring are
27
explicitly skipped.  
28
29
The import section is rewritten as three subsections, each separated by a
30
blank line. Any of the sections may be empty.
31
 1. Standard python library modules
32
 2. Import statements explicitly ordered to the top (see below)
33
 3. Third-party modules, meaning anything not fitting one of the other
34
    subsection criteria
35
 4. Local modules that begin with "canonical" or "lp".
36
37
Each section is sorted alphabetically by module name. Each module is put
38
on its own line, i.e.
39
{{{
40
  import os, sys
41
}}}
42
becomes
43
{{{
44
  import os
45
  import sys
46
}}}
47
Multiple import statements for the same module are conflated into one
48
statement, or two if the module was imported alongside an object inside it,
49
i.e.
50
{{{
51
  import sys
52
  from sys import stdin
53
}}}
54
55
Statements that import more than one objects are put on multiple lines in
56
list style, i.e.
57
{{{
58
  from sys import (
59
      stdin,
60
      stdout,
61
      )
62
}}}
63
Objects are sorted alphabetically and case-insensitively. One-object imports
64
are only formatted in this manner if the statement exceeds 78 characters in
65
length.
66
67
Comments stick with the import statement that followed them. Comments at the
68
end of one-line statements are moved to be be in front of it, .i.e.
69
{{{
70
  from sys import exit # Have a way out
71
}}}
72
becomes
73
{{{
74
  # Have a way out
75
  from sys import exit
76
}}}
77
78
= Format control =
79
11461.2.4 by Henning Eggers
Reviewer comments.
80
Two special comments allow to control the operation of the formatter.
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
81
11461.2.4 by Henning Eggers
Reviewer comments.
82
When an import statement is immediately preceded by a comment that starts
83
with the word "FIRST", it is placed into the second subsection (see above).
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
84
85
When the first import statement is directly preceded by a comment that starts
86
with the word "SKIP", the entire file is exempt from formatting.
87
88
= Known bugs =
89
90
Make sure to always check the result of the re-formatting to see if you have
91
been bitten by one of these.
92
93
Comments inside multi-line import statements break the formatter. A statement
94
like this will be ignored:
95
{{{
96
  from lp.app.interfaces import (
97
      # Don't do this.
98
      IMyInterface,
99
      IMyOtherInterface, # Don't do this either
100
      )
101
}}}
102
Actually, this will make the statement and all following to be ignored:
103
{{{
104
  from lp.app.interfaces import (
105
  # Breaks indentation rules anyway.
106
      IMyInterface,
107
      IMyOtherInterface,
108
      )
109
}}}
110
111
If a single-line statement has both a comment in front of it and at the end
112
of the line, only the end-line comment will survive. This could probably
113
easily be fixed to concatenate the too.
114
{{{
115
  # I am a gonner.
116
  from lp.app.interfaces import IMyInterface # I will survive!
117
}}}
118
119
Line continuation characters are recognized and resolved but
120
not re-introduced. This may leave the re-formatted text with a line that
121
is over the length limit.
122
{{{
123
    from lp.app.verylongnames.orverlydeep.modulestructure.leavenoroom \
124
        import object
125
}}}
126
""" 
127
128
__metaclass__ = type
129
130
# SKIP this file when reformatting.
131
import os
132
import re
133
import sys
11461.2.2 by Henning Eggers
Made documentation easily available.
134
from textwrap import dedent
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
135
136
sys.path[0:0] = [os.path.dirname(__file__)]
137
from python_standard_libs import python_standard_libs
138
139
140
# To search for escaped newline chars.
141
escaped_nl_regex = re.compile("\\\\\n", re.M)
142
import_regex = re.compile("^import +(?P<module>.+)$", re.M)
143
from_import_single_regex = re.compile(
144
    "^from (?P<module>.+) +import +"
145
    "(?P<objects>[*]|[a-zA-Z0-9_, ]+)"
146
    "(?P<comment>#.*)?$", re.M)
147
from_import_multi_regex = re.compile(
148
    "^from +(?P<module>.+) +import *[(](?P<objects>[a-zA-Z0-9_, \n]+)[)]$", re.M)
149
comment_regex = re.compile(
150
    "(?P<comment>(^#.+\n)+)(^import|^from) +(?P<module>[a-zA-Z0-9_.]+)", re.M)
151
split_regex = re.compile(",\s*")
152
153
# Module docstrings are multiline (""") strings that are not indented and are
154
# followed at some point by an import .
155
module_docstring_regex = re.compile(
156
    '(?P<docstring>^["]{3}[^"]+["]{3}\n).*^(import |from .+ import)', re.M | re.S)
157
# The imports section starts with an import state that is not a __future__
158
# import and consists of import lines, indented lines, empty lines and
159
# comments which are followed by an import line. Sometimes we even find
160
# lines that contain a single ")"... :-(
161
imports_section_regex = re.compile(
162
    "(^#.+\n)*^(import|(from ((?!__future__)\S+) import)).*\n"
163
    "(^import .+\n|^from .+\n|^[\t ]+.+\n|(^#.+\n)+((^import|^from) .+\n)|^\n|^[)]\n)*",
164
    re.M)
165
166
167
def format_import_lines(module, objects):
168
    """Generate correct from...import strings."""
169
    if len(objects) == 1:
170
        statement = "from %s import %s" % (module, objects[0])
171
        if len(statement) < 79:
172
            return statement
173
    return "from %s import (\n    %s,\n    )" % (
174
        module, ",\n    ".join(objects))
175
176
177
def find_imports_section(content):
178
    """Return that part of the file that contains the import statements."""
179
    # Skip module docstring.
180
    match = module_docstring_regex.search(content)
181
    if match is None:
182
        startpos = 0
183
    else:
184
        startpos = match.end('docstring')
185
186
    match = imports_section_regex.search(content, startpos)
187
    if match is None:
188
        return (None, None)
189
    startpos = match.start()
190
    endpos = match.end()
191
    if content[startpos:endpos].startswith('# SKIP'):
192
        # Skip files explicitely.
193
        return(None, None)
194
    return (startpos, endpos)
195
196
197
class ImportStatement:
198
    """Holds information about an import statement."""
199
200
    def __init__(self, objects=None, comment=None):
201
        self.import_module = objects is None
202
        if objects is None:
203
            self.objects = None
204
        else:
205
            self.objects = sorted(objects, key=str.lower)
206
        self.comment = comment
207
208
    def addObjects(self, new_objects):
209
        """More objects in this statement; eliminate duplicates."""
210
        if self.objects is None:
211
            # No objects so far.
212
            self.objects = new_objects
213
        else:
214
            # Use set to eliminate double objects.
215
            more_objects = set(self.objects + new_objects)
216
            self.objects = sorted(list(more_objects), key=str.lower)
217
218
    def setComment(self, comment):
219
        """Add a comment to the statement."""
220
        self.comment = comment
221
222
223
def parse_import_statements(import_section):
224
    """Split the import section into statements.
225
226
    Returns a dictionary with the module as the key and the objects being
227
    imported as a sorted list of strings."""
228
    imports = {}
229
    # Search for escaped newlines and remove them.
230
    searchpos =  0
231
    while True:
232
        match = escaped_nl_regex.search(import_section, searchpos)
233
        if match is None:
234
            break
235
        start = match.start()
236
        end = match.end()
237
        import_section = import_section[:start]+import_section[end:]
238
        searchpos = start
239
    # Search for simple one-line import statements.
240
    searchpos =  0
241
    while True:
242
        match = import_regex.search(import_section, searchpos)
243
        if match is None:
244
            break
245
        # These imports are marked by a "None" value.
246
        # Multiple modules in one statement are split up.
247
        for module in split_regex.split(match.group('module').strip()):
248
            imports[module] = ImportStatement()
249
        searchpos = match.end()
250
    # Search for "from ... import" statements.
251
    for pattern in (from_import_single_regex, from_import_multi_regex):
252
        searchpos = 0
253
        while True:
254
            match = pattern.search(import_section, searchpos)
255
            if match is None:
256
                break
257
            import_objects = split_regex.split(
258
                match.group('objects').strip(" \n,"))
259
            module = match.group('module').strip()
260
            # Only one pattern has a 'comment' group.
261
            comment = match.groupdict().get('comment', None)
262
            if module in imports:
263
                # Catch double import lines.
264
                imports[module].addObjects(import_objects)
265
            else:
266
                imports[module] = ImportStatement(import_objects)
267
            if comment is not None:
268
                imports[module].setComment(comment)
269
            searchpos = match.end()
270
    # Search for comments in import section.
271
    searchpos = 0
272
    while True:
273
        match = comment_regex.search(import_section, searchpos)
274
        if match is None:
275
            break
276
        module = match.group('module').strip()
277
        comment = match.group('comment').strip()
278
        imports[module].setComment(comment)
279
        searchpos = match.end()
280
281
    return imports
282
283
284
def format_imports(imports):
285
    """Group and order imports, return the new import statements."""
286
    standard_section = {}
287
    first_section = {}
288
    thirdparty_section = {}
289
    local_section = {}
290
    # Group modules into sections.
291
    for module, statement in imports.iteritems():
292
        module_base = module.split('.')[0]
293
        comment = statement.comment
294
        if comment is not None and comment.startswith("# FIRST"):
295
            first_section[module] = statement
296
        elif module_base in ('canonical', 'lp'):
297
            local_section[module] = statement
298
        elif module_base in python_standard_libs:
299
            standard_section[module] = statement
300
        else:
301
            thirdparty_section[module] = statement
302
    
303
    all_import_lines = []
304
    # Sort within each section and generate statement strings.
305
    sections = (
306
        standard_section,
307
        first_section,
308
        thirdparty_section,
309
        local_section,
310
        )
311
    for section in sections:
312
        import_lines = []
313
        for module in sorted(section.keys(), key=str.lower):
314
            if section[module].comment is not None:
315
                import_lines.append(section[module].comment)
316
            if section[module].import_module:
317
                import_lines.append("import %s" % module)
318
            if section[module].objects is not None:
319
                import_lines.append(
320
                    format_import_lines(module, section[module].objects))
321
        if len(import_lines) > 0:
322
            all_import_lines.append('\n'.join(import_lines))
11461.2.4 by Henning Eggers
Reviewer comments.
323
    # Sections are separated by two blank lines.
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
324
    return '\n\n'.join(all_import_lines)        
325
326
327
def reformat_importsection(filename):
11461.2.4 by Henning Eggers
Reviewer comments.
328
    """Replace the given file with a reformatted version of it."""
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
329
    pyfile = file(filename).read()
330
    import_start, import_end = find_imports_section(pyfile)
331
    if import_start is None:
332
        # Skip files with no import section.
333
        return False
334
    imports_section = pyfile[import_start:import_end]
335
    imports = parse_import_statements(imports_section)
336
337
    if pyfile[import_end:import_end+1] != '#':
338
        # Two newlines before anything but comments.
339
        number_of_newlines = 3
340
    else:
341
        number_of_newlines = 2
342
343
    new_imports = format_imports(imports)+"\n"*number_of_newlines
344
    if new_imports == imports_section:
345
      # No change, no need to write a new file.
346
      return False
347
    
348
    new_file = open(filename, "w")
349
    new_file.write(pyfile[:import_start])
350
    new_file.write(new_imports)
351
    new_file.write(pyfile[import_end:])
352
353
    return True
354
355
356
def process_file(fpath):
357
    """Process the file with the given path."""
358
    changed = reformat_importsection(fpath)
359
    if changed:
360
        print fpath
361
362
363
def process_tree(dpath):
364
    """Walk a directory tree and process all *.py files."""
365
    for dirpath, dirnames, filenames in os.walk(dpath):
366
        for filename in filenames:
367
            if filename.endswith('.py'):
368
                process_file(os.path.join(dirpath, filename))
369
370
371
if __name__ == "__main__":
11461.2.2 by Henning Eggers
Made documentation easily available.
372
    if len(sys.argv) == 1 or sys.argv[1] in ("-h", "-?", "--help"):
373
        sys.stderr.write(dedent("""\
374
        usage: format-imports <file or directory> ...
375
        
376
        Type "format-imports --docstring | less" to see the documentation.
377
        """))
378
        sys.exit(1)
379
    if sys.argv[1] == "--docstring":
380
        sys.stdout.write(__doc__)
381
        sys.exit(2)
11461.2.1 by Henning Eggers
Added format-imports script and documented it.
382
    for filename in sys.argv[1:]:
383
        if os.path.isdir(filename):
384
            process_tree(filename)
385
        else:
386
            process_file(filename)
11461.2.2 by Henning Eggers
Made documentation easily available.
387
    sys.exit(0)