~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#!/usr/bin/python
#
# Copyright 2010 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

""" Format import sections in python files

= Usage =

format-imports <file or directory> ...

= Operation =

The script will process each filename on the command line. If the file is a
directory it recurses into it an process all *.py files found in the tree.
It will output the paths of all the files that have been changed.

For Launchpad it was applied to the "lib/canonical/launchpad" and the "lib/lp"
subtrees. Running it with those parameters on a freshly branched LP tree
should not produce any output, meaning that all the files in the tree should
be formatted correctly.

The script identifies the import section of each file as a block of lines
that start with "import" or "from" or are indented with at least one space or
are blank lines. Comment lines are also included if they are followed by an
import statement. An inital __future__ import and a module docstring are
explicitly skipped.

The import section is rewritten as three subsections, each separated by a
blank line. Any of the sections may be empty.
 1. Standard python library modules
 2. Import statements explicitly ordered to the top (see below)
 3. Third-party modules, meaning anything not fitting one of the other
    subsection criteria
 4. Local modules that begin with "canonical" or "lp".

Each section is sorted alphabetically by module name. Each module is put
on its own line, i.e.
{{{
  import os, sys
}}}
becomes
{{{
  import os
  import sys
}}}
Multiple import statements for the same module are conflated into one
statement, or two if the module was imported alongside an object inside it,
i.e.
{{{
  import sys
  from sys import stdin
}}}

Statements that import more than one objects are put on multiple lines in
list style, i.e.
{{{
  from sys import (
      stdin,
      stdout,
      )
}}}
Objects are sorted alphabetically and case-insensitively. One-object imports
are only formatted in this manner if the statement exceeds 78 characters in
length.

Comments stick with the import statement that followed them. Comments at the
end of one-line statements are moved to be be in front of it, .i.e.
{{{
  from sys import exit # Have a way out
}}}
becomes
{{{
  # Have a way out
  from sys import exit
}}}

= Format control =

Two special comments allow to control the operation of the formatter.

When an import statement is immediately preceded by a comment that starts
with the word "FIRST", it is placed into the second subsection (see above).

When the first import statement is directly preceded by a comment that starts
with the word "SKIP", the entire file is exempt from formatting.

= Known bugs =

Make sure to always check the result of the re-formatting to see if you have
been bitten by one of these.

Comments inside multi-line import statements break the formatter. A statement
like this will be ignored:
{{{
  from lp.app.interfaces import (
      # Don't do this.
      IMyInterface,
      IMyOtherInterface, # Don't do this either
      )
}}}
Actually, this will make the statement and all following to be ignored:
{{{
  from lp.app.interfaces import (
  # Breaks indentation rules anyway.
      IMyInterface,
      IMyOtherInterface,
      )
}}}

If a single-line statement has both a comment in front of it and at the end
of the line, only the end-line comment will survive. This could probably
easily be fixed to concatenate the too.
{{{
  # I am a gonner.
  from lp.app.interfaces import IMyInterface # I will survive!
}}}

Line continuation characters are recognized and resolved but
not re-introduced. This may leave the re-formatted text with a line that
is over the length limit.
{{{
    from lp.app.verylongnames.orverlydeep.modulestructure.leavenoroom \
        import object
}}}
"""

__metaclass__ = type

# SKIP this file when reformatting.
import os
import re
import sys
from textwrap import dedent

sys.path[0:0] = [os.path.dirname(__file__)]
from python_standard_libs import python_standard_libs


# To search for escaped newline chars.
escaped_nl_regex = re.compile("\\\\\n", re.M)
import_regex = re.compile("^import +(?P<module>.+)$", re.M)
from_import_single_regex = re.compile(
    "^from (?P<module>.+) +import +"
    "(?P<objects>[*]|[a-zA-Z0-9_, ]+)"
    "(?P<comment>#.*)?$", re.M)
from_import_multi_regex = re.compile(
    "^from +(?P<module>.+) +import *[(](?P<objects>[a-zA-Z0-9_, \n]+)[)]$",
    re.M)
comment_regex = re.compile(
    "(?P<comment>(^#.+\n)+)(^import|^from) +(?P<module>[a-zA-Z0-9_.]+)", re.M)
split_regex = re.compile(",\s*")

# Module docstrings are multiline (""") strings that are not indented and are
# followed at some point by an import .
module_docstring_regex = re.compile(
    '(?P<docstring>^["]{3}[^"]+["]{3}\n).*^(import |from .+ import)',
    re.M | re.S)
# The imports section starts with an import state that is not a __future__
# import and consists of import lines, indented lines, empty lines and
# comments which are followed by an import line. Sometimes we even find
# lines that contain a single ")"... :-(
imports_section_regex = re.compile(
    "(^#.+\n)*^(import|(from ((?!__future__)\S+) import)).*\n"
    "(^import .+\n|^from .+\n|^[\t ]+.+\n|(^#.+\n)+((^import|^from) "
    ".+\n)|^\n|^[)]\n)*",
    re.M)


def format_import_lines(module, objects):
    """Generate correct from...import strings."""
    if len(objects) == 1:
        statement = "from %s import %s" % (module, objects[0])
        if len(statement) < 79:
            return statement
    return "from %s import (\n    %s,\n    )" % (
        module, ",\n    ".join(objects))


def find_imports_section(content):
    """Return that part of the file that contains the import statements."""
    # Skip module docstring.
    match = module_docstring_regex.search(content)
    if match is None:
        startpos = 0
    else:
        startpos = match.end('docstring')

    match = imports_section_regex.search(content, startpos)
    if match is None:
        return (None, None)
    startpos = match.start()
    endpos = match.end()
    if content[startpos:endpos].startswith('# SKIP'):
        # Skip files explicitely.
        return(None, None)
    return (startpos, endpos)


class ImportStatement:
    """Holds information about an import statement."""

    def __init__(self, objects=None, comment=None):
        self.import_module = objects is None
        if objects is None:
            self.objects = None
        else:
            self.objects = sorted(objects, key=str.lower)
        self.comment = comment

    def addObjects(self, new_objects):
        """More objects in this statement; eliminate duplicates."""
        if self.objects is None:
            # No objects so far.
            self.objects = new_objects
        else:
            # Use set to eliminate double objects.
            more_objects = set(self.objects + new_objects)
            self.objects = sorted(list(more_objects), key=str.lower)

    def setComment(self, comment):
        """Add a comment to the statement."""
        self.comment = comment


def parse_import_statements(import_section):
    """Split the import section into statements.

    Returns a dictionary with the module as the key and the objects being
    imported as a sorted list of strings."""
    imports = {}
    # Search for escaped newlines and remove them.
    searchpos = 0
    while True:
        match = escaped_nl_regex.search(import_section, searchpos)
        if match is None:
            break
        start = match.start()
        end = match.end()
        import_section = import_section[:start] + import_section[end:]
        searchpos = start
    # Search for simple one-line import statements.
    searchpos = 0
    while True:
        match = import_regex.search(import_section, searchpos)
        if match is None:
            break
        # These imports are marked by a "None" value.
        # Multiple modules in one statement are split up.
        for module in split_regex.split(match.group('module').strip()):
            imports[module] = ImportStatement()
        searchpos = match.end()
    # Search for "from ... import" statements.
    for pattern in (from_import_single_regex, from_import_multi_regex):
        searchpos = 0
        while True:
            match = pattern.search(import_section, searchpos)
            if match is None:
                break
            import_objects = split_regex.split(
                match.group('objects').strip(" \n,"))
            module = match.group('module').strip()
            # Only one pattern has a 'comment' group.
            comment = match.groupdict().get('comment', None)
            if module in imports:
                # Catch double import lines.
                imports[module].addObjects(import_objects)
            else:
                imports[module] = ImportStatement(import_objects)
            if comment is not None:
                imports[module].setComment(comment)
            searchpos = match.end()
    # Search for comments in import section.
    searchpos = 0
    while True:
        match = comment_regex.search(import_section, searchpos)
        if match is None:
            break
        module = match.group('module').strip()
        comment = match.group('comment').strip()
        imports[module].setComment(comment)
        searchpos = match.end()

    return imports


def format_imports(imports):
    """Group and order imports, return the new import statements."""
    standard_section = {}
    first_section = {}
    thirdparty_section = {}
    local_section = {}
    # Group modules into sections.
    for module, statement in imports.iteritems():
        module_base = module.split('.')[0]
        comment = statement.comment
        if comment is not None and comment.startswith("# FIRST"):
            first_section[module] = statement
        elif module_base in ('canonical', 'lp'):
            local_section[module] = statement
        elif module_base in python_standard_libs:
            standard_section[module] = statement
        else:
            thirdparty_section[module] = statement

    all_import_lines = []
    # Sort within each section and generate statement strings.
    sections = (
        standard_section,
        first_section,
        thirdparty_section,
        local_section,
        )
    for section in sections:
        import_lines = []
        for module in sorted(section.keys(), key=str.lower):
            if section[module].comment is not None:
                import_lines.append(section[module].comment)
            if section[module].import_module:
                import_lines.append("import %s" % module)
            if section[module].objects is not None:
                import_lines.append(
                    format_import_lines(module, section[module].objects))
        if len(import_lines) > 0:
            all_import_lines.append('\n'.join(import_lines))
    # Sections are separated by two blank lines.
    return '\n\n'.join(all_import_lines)


def reformat_importsection(filename):
    """Replace the given file with a reformatted version of it."""
    pyfile = file(filename).read()
    import_start, import_end = find_imports_section(pyfile)
    if import_start is None:
        # Skip files with no import section.
        return False
    imports_section = pyfile[import_start:import_end]
    imports = parse_import_statements(imports_section)

    if pyfile[import_end:import_end + 1] != '#':
        # Two newlines before anything but comments.
        number_of_newlines = 3
    else:
        number_of_newlines = 2

    new_imports = format_imports(imports) + ("\n" * number_of_newlines)
    if new_imports == imports_section:
        # No change, no need to write a new file.
        return False

    new_file = open(filename, "w")
    new_file.write(pyfile[:import_start])
    new_file.write(new_imports)
    new_file.write(pyfile[import_end:])

    return True


def process_file(fpath):
    """Process the file with the given path."""
    changed = reformat_importsection(fpath)
    if changed:
        print fpath


def process_tree(dpath):
    """Walk a directory tree and process all *.py files."""
    for dirpath, dirnames, filenames in os.walk(dpath):
        for filename in filenames:
            if filename.endswith('.py'):
                process_file(os.path.join(dirpath, filename))


if __name__ == "__main__":
    if len(sys.argv) == 1 or sys.argv[1] in ("-h", "-?", "--help"):
        sys.stderr.write(dedent("""\
        usage: format-imports <file or directory> ...

        Type "format-imports --docstring | less" to see the documentation.
        """))
        sys.exit(1)
    if sys.argv[1] == "--docstring":
        sys.stdout.write(__doc__)
        sys.exit(2)
    for filename in sys.argv[1:]:
        if os.path.isdir(filename):
            process_tree(filename)
        else:
            process_file(filename)
    sys.exit(0)