11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
1 |
#!/usr/bin/python
|
2 |
#
|
|
3 |
# Copyright 2010 Canonical Ltd. This software is licensed under the
|
|
4 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
|
5 |
||
6 |
""" Format import sections in python files
|
|
7 |
||
8 |
= Usage =
|
|
9 |
||
10 |
format-imports <file or directory> ...
|
|
11 |
||
12 |
= Operation =
|
|
13 |
||
14 |
The script will process each filename on the command line. If the file is a
|
|
15 |
directory it recurses into it an process all *.py files found in the tree.
|
|
16 |
It will output the paths of all the files that have been changed.
|
|
17 |
||
11461.2.3
by Henning Eggers
Usage on the LP tree. |
18 |
For Launchpad it was applied to the "lib/canonical/launchpad" and the "lib/lp"
|
19 |
subtrees. Running it with those parameters on a freshly branched LP tree
|
|
20 |
should not produce any output, meaning that all the files in the tree should
|
|
21 |
be formatted correctly.
|
|
22 |
||
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
23 |
The script identifies the import section of each file as a block of lines
|
24 |
that start with "import" or "from" or are indented with at least one space or
|
|
25 |
are blank lines. Comment lines are also included if they are followed by an
|
|
26 |
import statement. An inital __future__ import and a module docstring are
|
|
27 |
explicitly skipped.
|
|
28 |
||
29 |
The import section is rewritten as three subsections, each separated by a
|
|
30 |
blank line. Any of the sections may be empty.
|
|
31 |
1. Standard python library modules
|
|
32 |
2. Import statements explicitly ordered to the top (see below)
|
|
33 |
3. Third-party modules, meaning anything not fitting one of the other
|
|
34 |
subsection criteria
|
|
35 |
4. Local modules that begin with "canonical" or "lp".
|
|
36 |
||
37 |
Each section is sorted alphabetically by module name. Each module is put
|
|
38 |
on its own line, i.e.
|
|
39 |
{{{
|
|
40 |
import os, sys
|
|
41 |
}}}
|
|
42 |
becomes
|
|
43 |
{{{
|
|
44 |
import os
|
|
45 |
import sys
|
|
46 |
}}}
|
|
47 |
Multiple import statements for the same module are conflated into one
|
|
48 |
statement, or two if the module was imported alongside an object inside it,
|
|
49 |
i.e.
|
|
50 |
{{{
|
|
51 |
import sys
|
|
52 |
from sys import stdin
|
|
53 |
}}}
|
|
54 |
||
55 |
Statements that import more than one objects are put on multiple lines in
|
|
56 |
list style, i.e.
|
|
57 |
{{{
|
|
58 |
from sys import (
|
|
59 |
stdin,
|
|
60 |
stdout,
|
|
61 |
)
|
|
62 |
}}}
|
|
63 |
Objects are sorted alphabetically and case-insensitively. One-object imports
|
|
64 |
are only formatted in this manner if the statement exceeds 78 characters in
|
|
65 |
length.
|
|
66 |
||
67 |
Comments stick with the import statement that followed them. Comments at the
|
|
68 |
end of one-line statements are moved to be be in front of it, .i.e.
|
|
69 |
{{{
|
|
70 |
from sys import exit # Have a way out
|
|
71 |
}}}
|
|
72 |
becomes
|
|
73 |
{{{
|
|
74 |
# Have a way out
|
|
75 |
from sys import exit
|
|
76 |
}}}
|
|
77 |
||
78 |
= Format control =
|
|
79 |
||
11461.2.4
by Henning Eggers
Reviewer comments. |
80 |
Two special comments allow to control the operation of the formatter.
|
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
81 |
|
11461.2.4
by Henning Eggers
Reviewer comments. |
82 |
When an import statement is immediately preceded by a comment that starts
|
83 |
with the word "FIRST", it is placed into the second subsection (see above).
|
|
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
84 |
|
85 |
When the first import statement is directly preceded by a comment that starts
|
|
86 |
with the word "SKIP", the entire file is exempt from formatting.
|
|
87 |
||
88 |
= Known bugs =
|
|
89 |
||
90 |
Make sure to always check the result of the re-formatting to see if you have
|
|
91 |
been bitten by one of these.
|
|
92 |
||
93 |
Comments inside multi-line import statements break the formatter. A statement
|
|
94 |
like this will be ignored:
|
|
95 |
{{{
|
|
96 |
from lp.app.interfaces import (
|
|
97 |
# Don't do this.
|
|
98 |
IMyInterface,
|
|
99 |
IMyOtherInterface, # Don't do this either
|
|
100 |
)
|
|
101 |
}}}
|
|
102 |
Actually, this will make the statement and all following to be ignored:
|
|
103 |
{{{
|
|
104 |
from lp.app.interfaces import (
|
|
105 |
# Breaks indentation rules anyway.
|
|
106 |
IMyInterface,
|
|
107 |
IMyOtherInterface,
|
|
108 |
)
|
|
109 |
}}}
|
|
110 |
||
111 |
If a single-line statement has both a comment in front of it and at the end
|
|
112 |
of the line, only the end-line comment will survive. This could probably
|
|
113 |
easily be fixed to concatenate the too.
|
|
114 |
{{{
|
|
115 |
# I am a gonner.
|
|
116 |
from lp.app.interfaces import IMyInterface # I will survive!
|
|
117 |
}}}
|
|
118 |
||
119 |
Line continuation characters are recognized and resolved but
|
|
120 |
not re-introduced. This may leave the re-formatted text with a line that
|
|
121 |
is over the length limit.
|
|
122 |
{{{
|
|
123 |
from lp.app.verylongnames.orverlydeep.modulestructure.leavenoroom \
|
|
124 |
import object
|
|
125 |
}}}
|
|
126 |
"""
|
|
127 |
||
128 |
__metaclass__ = type |
|
129 |
||
130 |
# SKIP this file when reformatting.
|
|
131 |
import os |
|
132 |
import re |
|
133 |
import sys |
|
11461.2.2
by Henning Eggers
Made documentation easily available. |
134 |
from textwrap import dedent |
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
135 |
|
136 |
sys.path[0:0] = [os.path.dirname(__file__)] |
|
137 |
from python_standard_libs import python_standard_libs |
|
138 |
||
139 |
||
140 |
# To search for escaped newline chars.
|
|
141 |
escaped_nl_regex = re.compile("\\\\\n", re.M) |
|
142 |
import_regex = re.compile("^import +(?P<module>.+)$", re.M) |
|
143 |
from_import_single_regex = re.compile( |
|
144 |
"^from (?P<module>.+) +import +"
|
|
145 |
"(?P<objects>[*]|[a-zA-Z0-9_, ]+)"
|
|
146 |
"(?P<comment>#.*)?$", re.M) |
|
147 |
from_import_multi_regex = re.compile( |
|
148 |
"^from +(?P<module>.+) +import *[(](?P<objects>[a-zA-Z0-9_, \n]+)[)]$", re.M) |
|
149 |
comment_regex = re.compile( |
|
150 |
"(?P<comment>(^#.+\n)+)(^import|^from) +(?P<module>[a-zA-Z0-9_.]+)", re.M) |
|
151 |
split_regex = re.compile(",\s*") |
|
152 |
||
153 |
# Module docstrings are multiline (""") strings that are not indented and are
|
|
154 |
# followed at some point by an import .
|
|
155 |
module_docstring_regex = re.compile( |
|
156 |
'(?P<docstring>^["]{3}[^"]+["]{3}\n).*^(import |from .+ import)', re.M | re.S) |
|
157 |
# The imports section starts with an import state that is not a __future__
|
|
158 |
# import and consists of import lines, indented lines, empty lines and
|
|
159 |
# comments which are followed by an import line. Sometimes we even find
|
|
160 |
# lines that contain a single ")"... :-(
|
|
161 |
imports_section_regex = re.compile( |
|
162 |
"(^#.+\n)*^(import|(from ((?!__future__)\S+) import)).*\n" |
|
163 |
"(^import .+\n|^from .+\n|^[\t ]+.+\n|(^#.+\n)+((^import|^from) .+\n)|^\n|^[)]\n)*", |
|
164 |
re.M) |
|
165 |
||
166 |
||
167 |
def format_import_lines(module, objects): |
|
168 |
"""Generate correct from...import strings."""
|
|
169 |
if len(objects) == 1: |
|
170 |
statement = "from %s import %s" % (module, objects[0]) |
|
171 |
if len(statement) < 79: |
|
172 |
return statement |
|
173 |
return "from %s import (\n %s,\n )" % ( |
|
174 |
module, ",\n ".join(objects)) |
|
175 |
||
176 |
||
177 |
def find_imports_section(content): |
|
178 |
"""Return that part of the file that contains the import statements."""
|
|
179 |
# Skip module docstring.
|
|
180 |
match = module_docstring_regex.search(content) |
|
181 |
if match is None: |
|
182 |
startpos = 0 |
|
183 |
else: |
|
184 |
startpos = match.end('docstring') |
|
185 |
||
186 |
match = imports_section_regex.search(content, startpos) |
|
187 |
if match is None: |
|
188 |
return (None, None) |
|
189 |
startpos = match.start() |
|
190 |
endpos = match.end() |
|
191 |
if content[startpos:endpos].startswith('# SKIP'): |
|
192 |
# Skip files explicitely.
|
|
193 |
return(None, None) |
|
194 |
return (startpos, endpos) |
|
195 |
||
196 |
||
197 |
class ImportStatement: |
|
198 |
"""Holds information about an import statement."""
|
|
199 |
||
200 |
def __init__(self, objects=None, comment=None): |
|
201 |
self.import_module = objects is None |
|
202 |
if objects is None: |
|
203 |
self.objects = None |
|
204 |
else: |
|
205 |
self.objects = sorted(objects, key=str.lower) |
|
206 |
self.comment = comment |
|
207 |
||
208 |
def addObjects(self, new_objects): |
|
209 |
"""More objects in this statement; eliminate duplicates."""
|
|
210 |
if self.objects is None: |
|
211 |
# No objects so far.
|
|
212 |
self.objects = new_objects |
|
213 |
else: |
|
214 |
# Use set to eliminate double objects.
|
|
215 |
more_objects = set(self.objects + new_objects) |
|
216 |
self.objects = sorted(list(more_objects), key=str.lower) |
|
217 |
||
218 |
def setComment(self, comment): |
|
219 |
"""Add a comment to the statement."""
|
|
220 |
self.comment = comment |
|
221 |
||
222 |
||
223 |
def parse_import_statements(import_section): |
|
224 |
"""Split the import section into statements.
|
|
225 |
||
226 |
Returns a dictionary with the module as the key and the objects being
|
|
227 |
imported as a sorted list of strings."""
|
|
228 |
imports = {} |
|
229 |
# Search for escaped newlines and remove them.
|
|
230 |
searchpos = 0 |
|
231 |
while True: |
|
232 |
match = escaped_nl_regex.search(import_section, searchpos) |
|
233 |
if match is None: |
|
234 |
break
|
|
235 |
start = match.start() |
|
236 |
end = match.end() |
|
237 |
import_section = import_section[:start]+import_section[end:] |
|
238 |
searchpos = start |
|
239 |
# Search for simple one-line import statements.
|
|
240 |
searchpos = 0 |
|
241 |
while True: |
|
242 |
match = import_regex.search(import_section, searchpos) |
|
243 |
if match is None: |
|
244 |
break
|
|
245 |
# These imports are marked by a "None" value.
|
|
246 |
# Multiple modules in one statement are split up.
|
|
247 |
for module in split_regex.split(match.group('module').strip()): |
|
248 |
imports[module] = ImportStatement() |
|
249 |
searchpos = match.end() |
|
250 |
# Search for "from ... import" statements.
|
|
251 |
for pattern in (from_import_single_regex, from_import_multi_regex): |
|
252 |
searchpos = 0 |
|
253 |
while True: |
|
254 |
match = pattern.search(import_section, searchpos) |
|
255 |
if match is None: |
|
256 |
break
|
|
257 |
import_objects = split_regex.split( |
|
258 |
match.group('objects').strip(" \n,")) |
|
259 |
module = match.group('module').strip() |
|
260 |
# Only one pattern has a 'comment' group.
|
|
261 |
comment = match.groupdict().get('comment', None) |
|
262 |
if module in imports: |
|
263 |
# Catch double import lines.
|
|
264 |
imports[module].addObjects(import_objects) |
|
265 |
else: |
|
266 |
imports[module] = ImportStatement(import_objects) |
|
267 |
if comment is not None: |
|
268 |
imports[module].setComment(comment) |
|
269 |
searchpos = match.end() |
|
270 |
# Search for comments in import section.
|
|
271 |
searchpos = 0 |
|
272 |
while True: |
|
273 |
match = comment_regex.search(import_section, searchpos) |
|
274 |
if match is None: |
|
275 |
break
|
|
276 |
module = match.group('module').strip() |
|
277 |
comment = match.group('comment').strip() |
|
278 |
imports[module].setComment(comment) |
|
279 |
searchpos = match.end() |
|
280 |
||
281 |
return imports |
|
282 |
||
283 |
||
284 |
def format_imports(imports): |
|
285 |
"""Group and order imports, return the new import statements."""
|
|
286 |
standard_section = {} |
|
287 |
first_section = {} |
|
288 |
thirdparty_section = {} |
|
289 |
local_section = {} |
|
290 |
# Group modules into sections.
|
|
291 |
for module, statement in imports.iteritems(): |
|
292 |
module_base = module.split('.')[0] |
|
293 |
comment = statement.comment |
|
294 |
if comment is not None and comment.startswith("# FIRST"): |
|
295 |
first_section[module] = statement |
|
296 |
elif module_base in ('canonical', 'lp'): |
|
297 |
local_section[module] = statement |
|
298 |
elif module_base in python_standard_libs: |
|
299 |
standard_section[module] = statement |
|
300 |
else: |
|
301 |
thirdparty_section[module] = statement |
|
302 |
||
303 |
all_import_lines = [] |
|
304 |
# Sort within each section and generate statement strings.
|
|
305 |
sections = ( |
|
306 |
standard_section, |
|
307 |
first_section, |
|
308 |
thirdparty_section, |
|
309 |
local_section, |
|
310 |
)
|
|
311 |
for section in sections: |
|
312 |
import_lines = [] |
|
313 |
for module in sorted(section.keys(), key=str.lower): |
|
314 |
if section[module].comment is not None: |
|
315 |
import_lines.append(section[module].comment) |
|
316 |
if section[module].import_module: |
|
317 |
import_lines.append("import %s" % module) |
|
318 |
if section[module].objects is not None: |
|
319 |
import_lines.append( |
|
320 |
format_import_lines(module, section[module].objects)) |
|
321 |
if len(import_lines) > 0: |
|
322 |
all_import_lines.append('\n'.join(import_lines)) |
|
11461.2.4
by Henning Eggers
Reviewer comments. |
323 |
# Sections are separated by two blank lines.
|
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
324 |
return '\n\n'.join(all_import_lines) |
325 |
||
326 |
||
327 |
def reformat_importsection(filename): |
|
11461.2.4
by Henning Eggers
Reviewer comments. |
328 |
"""Replace the given file with a reformatted version of it."""
|
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
329 |
pyfile = file(filename).read() |
330 |
import_start, import_end = find_imports_section(pyfile) |
|
331 |
if import_start is None: |
|
332 |
# Skip files with no import section.
|
|
333 |
return False |
|
334 |
imports_section = pyfile[import_start:import_end] |
|
335 |
imports = parse_import_statements(imports_section) |
|
336 |
||
337 |
if pyfile[import_end:import_end+1] != '#': |
|
338 |
# Two newlines before anything but comments.
|
|
339 |
number_of_newlines = 3 |
|
340 |
else: |
|
341 |
number_of_newlines = 2 |
|
342 |
||
343 |
new_imports = format_imports(imports)+"\n"*number_of_newlines |
|
344 |
if new_imports == imports_section: |
|
345 |
# No change, no need to write a new file.
|
|
346 |
return False |
|
347 |
||
348 |
new_file = open(filename, "w") |
|
349 |
new_file.write(pyfile[:import_start]) |
|
350 |
new_file.write(new_imports) |
|
351 |
new_file.write(pyfile[import_end:]) |
|
352 |
||
353 |
return True |
|
354 |
||
355 |
||
356 |
def process_file(fpath): |
|
357 |
"""Process the file with the given path."""
|
|
358 |
changed = reformat_importsection(fpath) |
|
359 |
if changed: |
|
360 |
print fpath |
|
361 |
||
362 |
||
363 |
def process_tree(dpath): |
|
364 |
"""Walk a directory tree and process all *.py files."""
|
|
365 |
for dirpath, dirnames, filenames in os.walk(dpath): |
|
366 |
for filename in filenames: |
|
367 |
if filename.endswith('.py'): |
|
368 |
process_file(os.path.join(dirpath, filename)) |
|
369 |
||
370 |
||
371 |
if __name__ == "__main__": |
|
11461.2.2
by Henning Eggers
Made documentation easily available. |
372 |
if len(sys.argv) == 1 or sys.argv[1] in ("-h", "-?", "--help"): |
373 |
sys.stderr.write(dedent("""\ |
|
374 |
usage: format-imports <file or directory> ...
|
|
375 |
|
|
376 |
Type "format-imports --docstring | less" to see the documentation.
|
|
377 |
""")) |
|
378 |
sys.exit(1) |
|
379 |
if sys.argv[1] == "--docstring": |
|
380 |
sys.stdout.write(__doc__) |
|
381 |
sys.exit(2) |
|
11461.2.1
by Henning Eggers
Added format-imports script and documented it. |
382 |
for filename in sys.argv[1:]: |
383 |
if os.path.isdir(filename): |
|
384 |
process_tree(filename) |
|
385 |
else: |
|
386 |
process_file(filename) |
|
11461.2.2
by Henning Eggers
Made documentation easily available. |
387 |
sys.exit(0) |