3
# Copyright 2010 Canonical Ltd. This software is licensed under the
4
# GNU Affero General Public License version 3 (see the file LICENSE).
6
""" Format import sections in python files
10
format-imports <file or directory> ...
14
The script will process each filename on the command line. If the file is a
15
directory it recurses into it an process all *.py files found in the tree.
16
It will output the paths of all the files that have been changed.
18
The script identifies the import section of each file as a block of lines
19
that start with "import" or "from" or are indented with at least one space or
20
are blank lines. Comment lines are also included if they are followed by an
21
import statement. An inital __future__ import and a module docstring are
24
The import section is rewritten as three subsections, each separated by a
25
blank line. Any of the sections may be empty.
26
1. Standard python library modules
27
2. Import statements explicitly ordered to the top (see below)
28
3. Third-party modules, meaning anything not fitting one of the other
30
4. Local modules that begin with "canonical" or "lp".
32
Each section is sorted alphabetically by module name. Each module is put
42
Multiple import statements for the same module are conflated into one
43
statement, or two if the module was imported alongside an object inside it,
50
Statements that import more than one objects are put on multiple lines in
58
Objects are sorted alphabetically and case-insensitively. One-object imports
59
are only formatted in this manner if the statement exceeds 78 characters in
62
Comments stick with the import statement that followed them. Comments at the
63
end of one-line statements are moved to be be in front of it, .i.e.
65
from sys import exit # Have a way out
75
Tow special comments allow to control the operation of the formatter.
77
When an import statement is directly preceded by a comment that starts with
78
the word "FIRST", it is placed into the second subsection (see above).
80
When the first import statement is directly preceded by a comment that starts
81
with the word "SKIP", the entire file is exempt from formatting.
85
Make sure to always check the result of the re-formatting to see if you have
86
been bitten by one of these.
88
Comments inside multi-line import statements break the formatter. A statement
89
like this will be ignored:
91
from lp.app.interfaces import (
94
IMyOtherInterface, # Don't do this either
97
Actually, this will make the statement and all following to be ignored:
99
from lp.app.interfaces import (
100
# Breaks indentation rules anyway.
106
If a single-line statement has both a comment in front of it and at the end
107
of the line, only the end-line comment will survive. This could probably
108
easily be fixed to concatenate the too.
111
from lp.app.interfaces import IMyInterface # I will survive!
114
Line continuation characters are recognized and resolved but
115
not re-introduced. This may leave the re-formatted text with a line that
116
is over the length limit.
118
from lp.app.verylongnames.orverlydeep.modulestructure.leavenoroom \
125
# SKIP this file when reformatting.
130
sys.path[0:0] = [os.path.dirname(__file__)]
131
from python_standard_libs import python_standard_libs
134
# To search for escaped newline chars.
135
escaped_nl_regex = re.compile("\\\\\n", re.M)
136
import_regex = re.compile("^import +(?P<module>.+)$", re.M)
137
from_import_single_regex = re.compile(
138
"^from (?P<module>.+) +import +"
139
"(?P<objects>[*]|[a-zA-Z0-9_, ]+)"
140
"(?P<comment>#.*)?$", re.M)
141
from_import_multi_regex = re.compile(
142
"^from +(?P<module>.+) +import *[(](?P<objects>[a-zA-Z0-9_, \n]+)[)]$", re.M)
143
comment_regex = re.compile(
144
"(?P<comment>(^#.+\n)+)(^import|^from) +(?P<module>[a-zA-Z0-9_.]+)", re.M)
145
split_regex = re.compile(",\s*")
147
# Module docstrings are multiline (""") strings that are not indented and are
148
# followed at some point by an import .
149
module_docstring_regex = re.compile(
150
'(?P<docstring>^["]{3}[^"]+["]{3}\n).*^(import |from .+ import)', re.M | re.S)
151
# The imports section starts with an import state that is not a __future__
152
# import and consists of import lines, indented lines, empty lines and
153
# comments which are followed by an import line. Sometimes we even find
154
# lines that contain a single ")"... :-(
155
imports_section_regex = re.compile(
156
"(^#.+\n)*^(import|(from ((?!__future__)\S+) import)).*\n"
157
"(^import .+\n|^from .+\n|^[\t ]+.+\n|(^#.+\n)+((^import|^from) .+\n)|^\n|^[)]\n)*",
161
def format_import_lines(module, objects):
162
"""Generate correct from...import strings."""
163
if len(objects) == 1:
164
statement = "from %s import %s" % (module, objects[0])
165
if len(statement) < 79:
167
return "from %s import (\n %s,\n )" % (
168
module, ",\n ".join(objects))
171
def find_imports_section(content):
172
"""Return that part of the file that contains the import statements."""
173
# Skip module docstring.
174
match = module_docstring_regex.search(content)
178
startpos = match.end('docstring')
180
match = imports_section_regex.search(content, startpos)
183
startpos = match.start()
185
if content[startpos:endpos].startswith('# SKIP'):
186
# Skip files explicitely.
188
return (startpos, endpos)
191
class ImportStatement:
192
"""Holds information about an import statement."""
194
def __init__(self, objects=None, comment=None):
195
self.import_module = objects is None
199
self.objects = sorted(objects, key=str.lower)
200
self.comment = comment
202
def addObjects(self, new_objects):
203
"""More objects in this statement; eliminate duplicates."""
204
if self.objects is None:
206
self.objects = new_objects
208
# Use set to eliminate double objects.
209
more_objects = set(self.objects + new_objects)
210
self.objects = sorted(list(more_objects), key=str.lower)
212
def setComment(self, comment):
213
"""Add a comment to the statement."""
214
self.comment = comment
217
def parse_import_statements(import_section):
218
"""Split the import section into statements.
220
Returns a dictionary with the module as the key and the objects being
221
imported as a sorted list of strings."""
223
# Search for escaped newlines and remove them.
226
match = escaped_nl_regex.search(import_section, searchpos)
229
start = match.start()
231
import_section = import_section[:start]+import_section[end:]
233
# Search for simple one-line import statements.
236
match = import_regex.search(import_section, searchpos)
239
# These imports are marked by a "None" value.
240
# Multiple modules in one statement are split up.
241
for module in split_regex.split(match.group('module').strip()):
242
imports[module] = ImportStatement()
243
searchpos = match.end()
244
# Search for "from ... import" statements.
245
for pattern in (from_import_single_regex, from_import_multi_regex):
248
match = pattern.search(import_section, searchpos)
251
import_objects = split_regex.split(
252
match.group('objects').strip(" \n,"))
253
module = match.group('module').strip()
254
# Only one pattern has a 'comment' group.
255
comment = match.groupdict().get('comment', None)
256
if module in imports:
257
# Catch double import lines.
258
imports[module].addObjects(import_objects)
260
imports[module] = ImportStatement(import_objects)
261
if comment is not None:
262
imports[module].setComment(comment)
263
searchpos = match.end()
264
# Search for comments in import section.
267
match = comment_regex.search(import_section, searchpos)
270
module = match.group('module').strip()
271
comment = match.group('comment').strip()
272
imports[module].setComment(comment)
273
searchpos = match.end()
278
def format_imports(imports):
279
"""Group and order imports, return the new import statements."""
280
standard_section = {}
282
thirdparty_section = {}
284
# Group modules into sections.
285
for module, statement in imports.iteritems():
286
module_base = module.split('.')[0]
287
comment = statement.comment
288
if comment is not None and comment.startswith("# FIRST"):
289
first_section[module] = statement
290
elif module_base in ('canonical', 'lp'):
291
local_section[module] = statement
292
elif module_base in python_standard_libs:
293
standard_section[module] = statement
295
thirdparty_section[module] = statement
297
all_import_lines = []
298
# Sort within each section and generate statement strings.
305
for section in sections:
307
for module in sorted(section.keys(), key=str.lower):
308
if section[module].comment is not None:
309
import_lines.append(section[module].comment)
310
if section[module].import_module:
311
import_lines.append("import %s" % module)
312
if section[module].objects is not None:
314
format_import_lines(module, section[module].objects))
315
if len(import_lines) > 0:
316
all_import_lines.append('\n'.join(import_lines))
317
# Sections are seperated by two blank lines.
318
return '\n\n'.join(all_import_lines)
321
def reformat_importsection(filename):
322
"""Replace the given file with a reformated version of it."""
323
pyfile = file(filename).read()
324
import_start, import_end = find_imports_section(pyfile)
325
if import_start is None:
326
# Skip files with no import section.
328
imports_section = pyfile[import_start:import_end]
329
imports = parse_import_statements(imports_section)
331
if pyfile[import_end:import_end+1] != '#':
332
# Two newlines before anything but comments.
333
number_of_newlines = 3
335
number_of_newlines = 2
337
new_imports = format_imports(imports)+"\n"*number_of_newlines
338
if new_imports == imports_section:
339
# No change, no need to write a new file.
342
new_file = open(filename, "w")
343
new_file.write(pyfile[:import_start])
344
new_file.write(new_imports)
345
new_file.write(pyfile[import_end:])
350
def process_file(fpath):
351
"""Process the file with the given path."""
352
changed = reformat_importsection(fpath)
357
def process_tree(dpath):
358
"""Walk a directory tree and process all *.py files."""
359
for dirpath, dirnames, filenames in os.walk(dpath):
360
for filename in filenames:
361
if filename.endswith('.py'):
362
process_file(os.path.join(dirpath, filename))
365
if __name__ == "__main__":
366
for filename in sys.argv[1:]:
367
if os.path.isdir(filename):
368
process_tree(filename)
370
process_file(filename)