~launchpad-pqm/launchpad/devel

« back to all changes in this revision

Viewing changes to utilities/format-imports

Committer: Henning Eggers
Date: 2010-08-27 15:38:25 UTC
mto: This revision was merged to the branch mainline in revision 11469.
Revision ID: henning@canonical.com-20100827153825-tzudt3kgq91s0nbb

Added format-imports script and documented it.

files added:
utilities/format-imports

utilities/python_standard_libs.py

Show diffs side-by-side

added added

removed removed

utilities/format-imports

#!/usr/bin/python

# GNU Affero General Public License version 3 (see the file LICENSE).

""" Format import sections in python files

= Usage =

format-imports <file or directory> ...

= Operation =

The script will process each filename on the command line. If the file is a

directory it recurses into it an process all *.py files found in the tree.

It will output the paths of all the files that have been changed.

The script identifies the import section of each file as a block of lines

that start with "import" or "from" or are indented with at least one space or

are blank lines. Comment lines are also included if they are followed by an

import statement. An inital __future__ import and a module docstring are

explicitly skipped.

The import section is rewritten as three subsections, each separated by a

blank line. Any of the sections may be empty.

1. Standard python library modules

2. Import statements explicitly ordered to the top (see below)

3. Third-party modules, meaning anything not fitting one of the other

subsection criteria

4. Local modules that begin with "canonical" or "lp".

Each section is sorted alphabetically by module name. Each module is put

on its own line, i.e.

{{{

import os, sys

}}}

becomes

{{{

import os

import sys

}}}

Multiple import statements for the same module are conflated into one

statement, or two if the module was imported alongside an object inside it,

i.e.

{{{

import sys

from sys import stdin

}}}

Statements that import more than one objects are put on multiple lines in

list style, i.e.

{{{

from sys import (

stdin,

stdout,

)

}}}

Objects are sorted alphabetically and case-insensitively. One-object imports

are only formatted in this manner if the statement exceeds 78 characters in

length.

Comments stick with the import statement that followed them. Comments at the

end of one-line statements are moved to be be in front of it, .i.e.

{{{

from sys import exit # Have a way out

}}}

becomes

{{{

# Have a way out

from sys import exit

}}}

= Format control =

Tow special comments allow to control the operation of the formatter.

When an import statement is directly preceded by a comment that starts with

the word "FIRST", it is placed into the second subsection (see above).

When the first import statement is directly preceded by a comment that starts

with the word "SKIP", the entire file is exempt from formatting.

= Known bugs =

Make sure to always check the result of the re-formatting to see if you have

been bitten by one of these.

Comments inside multi-line import statements break the formatter. A statement

like this will be ignored:

{{{

from lp.app.interfaces import (

# Don't do this.

IMyInterface,

IMyOtherInterface, # Don't do this either

)

}}}

Actually, this will make the statement and all following to be ignored:

{{{

from lp.app.interfaces import (

100

# Breaks indentation rules anyway.

101

IMyInterface,

102

IMyOtherInterface,

103

)

104

}}}

105

106

If a single-line statement has both a comment in front of it and at the end

107

of the line, only the end-line comment will survive. This could probably

108

easily be fixed to concatenate the too.

109

{{{

110

# I am a gonner.

111

from lp.app.interfaces import IMyInterface # I will survive!

112

}}}

113

114

Line continuation characters are recognized and resolved but

115

not re-introduced. This may leave the re-formatted text with a line that

116

is over the length limit.

117

{{{

118

from lp.app.verylongnames.orverlydeep.modulestructure.leavenoroom \

119

import object

120

}}}

121

"""

122

123

__metaclass__ = type

124

125

# SKIP this file when reformatting.

126

import os

127

import re

128

import sys

129

130

sys.path[0:0] = [os.path.dirname(__file__)]

131

from python_standard_libs import python_standard_libs

132

133

134

# To search for escaped newline chars.

135

escaped_nl_regex = re.compile("\\\\\n", re.M)

136

import_regex = re.compile("^import +(?P<module>.+)$", re.M)

137

from_import_single_regex = re.compile(

138

"^from (?P<module>.+) +import +"

139

"(?P<objects>[*]|[a-zA-Z0-9_, ]+)"

140

"(?P<comment>#.*)?$", re.M)

141

from_import_multi_regex = re.compile(

142

"^from +(?P<module>.+) +import *[(](?P<objects>[a-zA-Z0-9_, \n]+)[)]$", re.M)

143

comment_regex = re.compile(

144

"(?P<comment>(^#.+\n)+)(^import|^from) +(?P<module>[a-zA-Z0-9_.]+)", re.M)

145

split_regex = re.compile(",\s*")

146

147

# Module docstrings are multiline (""") strings that are not indented and are

148

# followed at some point by an import .

149

module_docstring_regex = re.compile(

150

'(?P<docstring>^["]{3}[^"]+["]{3}\n).*^(import |from .+ import)', re.M | re.S)

151

# The imports section starts with an import state that is not a __future__

152

# import and consists of import lines, indented lines, empty lines and

153

# comments which are followed by an import line. Sometimes we even find

154

# lines that contain a single ")"... :-(

155

imports_section_regex = re.compile(

156

"(^#.+\n)*^(import|(from ((?!__future__)\S+) import)).*\n"

157

"(^import .+\n|^from .+\n|^[\t ]+.+\n|(^#.+\n)+((^import|^from) .+\n)|^\n|^[)]\n)*",

158

re.M)

159

160

161

def format_import_lines(module, objects):

162

"""Generate correct from...import strings."""

163

if len(objects) == 1:

164

statement = "from %s import %s" % (module, objects[0])

165

if len(statement) < 79:

166

return statement

167

return "from %s import (\n %s,\n )" % (

168

module, ",\n ".join(objects))

169

170

171

def find_imports_section(content):

172

"""Return that part of the file that contains the import statements."""

173

# Skip module docstring.

174

match = module_docstring_regex.search(content)

175

if match is None:

176

startpos = 0

177

else:

178

startpos = match.end('docstring')

179

180

match = imports_section_regex.search(content, startpos)

181

if match is None:

182

return (None, None)

183

startpos = match.start()

184

endpos = match.end()

185

if content[startpos:endpos].startswith('# SKIP'):

186

# Skip files explicitely.

187

return(None, None)

188

return (startpos, endpos)

189

190

191

class ImportStatement:

192

"""Holds information about an import statement."""

193

194

def __init__(self, objects=None, comment=None):

195

self.import_module = objects is None

196

if objects is None:

197

self.objects = None

198

else:

199

self.objects = sorted(objects, key=str.lower)

200

self.comment = comment

201

202

def addObjects(self, new_objects):

203

"""More objects in this statement; eliminate duplicates."""

204

if self.objects is None:

205

# No objects so far.

206

self.objects = new_objects

207

else:

208

# Use set to eliminate double objects.

209

more_objects = set(self.objects + new_objects)

210

self.objects = sorted(list(more_objects), key=str.lower)

211

212

def setComment(self, comment):

213

"""Add a comment to the statement."""

214

self.comment = comment

215

216

217

def parse_import_statements(import_section):

218

"""Split the import section into statements.

219

220

Returns a dictionary with the module as the key and the objects being

221

imported as a sorted list of strings."""

222

imports = {}

223

# Search for escaped newlines and remove them.

224

searchpos = 0

225

while True:

226

match = escaped_nl_regex.search(import_section, searchpos)

227

if match is None:

228

break

229

start = match.start()

230

end = match.end()

231

import_section = import_section[:start]+import_section[end:]

232

searchpos = start

233

# Search for simple one-line import statements.

234

searchpos = 0

235

while True:

236

match = import_regex.search(import_section, searchpos)

237

if match is None:

238

break

239

# These imports are marked by a "None" value.

240

# Multiple modules in one statement are split up.

241

for module in split_regex.split(match.group('module').strip()):

242

imports[module] = ImportStatement()

243

searchpos = match.end()

244

# Search for "from ... import" statements.

245

for pattern in (from_import_single_regex, from_import_multi_regex):

246

searchpos = 0

247

while True:

248

match = pattern.search(import_section, searchpos)

249

if match is None:

250

break

251

import_objects = split_regex.split(

252

match.group('objects').strip(" \n,"))

253

module = match.group('module').strip()

254

# Only one pattern has a 'comment' group.

255

comment = match.groupdict().get('comment', None)

256

if module in imports:

257

# Catch double import lines.

258

imports[module].addObjects(import_objects)

259

else:

260

imports[module] = ImportStatement(import_objects)

261

if comment is not None:

262

imports[module].setComment(comment)

263

searchpos = match.end()

264

# Search for comments in import section.

265

searchpos = 0

266

while True:

267

match = comment_regex.search(import_section, searchpos)

268

if match is None:

269

break

270

module = match.group('module').strip()

271

comment = match.group('comment').strip()

272

imports[module].setComment(comment)

273

searchpos = match.end()

274

275

return imports

276

277

278

def format_imports(imports):

279

"""Group and order imports, return the new import statements."""

280

standard_section = {}

281

first_section = {}

282

thirdparty_section = {}

283

local_section = {}

284

# Group modules into sections.

285

for module, statement in imports.iteritems():

286

module_base = module.split('.')[0]

287

comment = statement.comment

288

if comment is not None and comment.startswith("# FIRST"):

289

first_section[module] = statement

290

elif module_base in ('canonical', 'lp'):

291

local_section[module] = statement

292

elif module_base in python_standard_libs:

293

standard_section[module] = statement

294

else:

295

thirdparty_section[module] = statement

296

297

all_import_lines = []

298

# Sort within each section and generate statement strings.

299

sections = (

300

standard_section,

301

first_section,

302

thirdparty_section,

303

local_section,

304

)

305

for section in sections:

306

import_lines = []

307

for module in sorted(section.keys(), key=str.lower):

308

if section[module].comment is not None:

309

import_lines.append(section[module].comment)

310

if section[module].import_module:

311

import_lines.append("import %s" % module)

312

if section[module].objects is not None:

313

import_lines.append(

314

format_import_lines(module, section[module].objects))

315

if len(import_lines) > 0:

316

all_import_lines.append('\n'.join(import_lines))

317

# Sections are seperated by two blank lines.

318

return '\n\n'.join(all_import_lines)

319

320

321

def reformat_importsection(filename):

322

"""Replace the given file with a reformated version of it."""

323

pyfile = file(filename).read()

324

import_start, import_end = find_imports_section(pyfile)

325

if import_start is None:

326

# Skip files with no import section.

327

return False

328

imports_section = pyfile[import_start:import_end]

329

imports = parse_import_statements(imports_section)

330

331

if pyfile[import_end:import_end+1] != '#':

332

# Two newlines before anything but comments.

333

number_of_newlines = 3

334

else:

335

number_of_newlines = 2

336

337

new_imports = format_imports(imports)+"\n"*number_of_newlines

338

if new_imports == imports_section:

339

# No change, no need to write a new file.

340

return False

341

342

new_file = open(filename, "w")

343

new_file.write(pyfile[:import_start])

344

new_file.write(new_imports)

345

new_file.write(pyfile[import_end:])

346

347

return True

348

349

350

def process_file(fpath):

351

"""Process the file with the given path."""

352

changed = reformat_importsection(fpath)

353

if changed:

354

print fpath

355

356

357

def process_tree(dpath):

358

"""Walk a directory tree and process all *.py files."""

359

for dirpath, dirnames, filenames in os.walk(dpath):

360

for filename in filenames:

361

if filename.endswith('.py'):

362

process_file(os.path.join(dirpath, filename))

363

364

365

if __name__ == "__main__":

366

for filename in sys.argv[1:]:

367

if os.path.isdir(filename):

368

process_tree(filename)

369

else:

370

process_file(filename)

371

Older »