~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
#!/usr/bin/python
#
# Copyright 2009 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Create a XXX comment reports in many formats."""


__metaclass__ = type


import cgi
from optparse import OptionParser
import os
import re
import sys
from textwrap import dedent
import time

from bzrlib import bzrdir
from bzrlib.errors import (NotBranchError)


excluded_dir_re = re.compile(r'.*(not-used|lib/mailman)')
excluded_file_re = re.compile(r'.*(pyc$)')


class Report:
    """The base class for an XXX report."""
    # Match XXX comments.
    xxx_re = re.compile('^\s*(<!--|//|#) XXX[:,]?')

    def __init__(self, root_dir, output_name=None):
        """Create and write the HTML report to a file.

        :param root_dir: The root directory that contains files with comments.
        :param output_name: The name of the html file to write to.
        """
        assert os.path.isdir(root_dir), (
            "Root directory does not exist: %s." % root_dir)
        self.root_dir = root_dir
        self.output_name = output_name
        self.revno = self._getBranchRevno()
        self.comments = self._findComments()

    def _close(self, output_file):
        """Close the output_file if it was opened."""
        if self.output_name is not None:
            output_file.close()

    def _getBranchRevno(self):
        """Return the bazaar revision number of the branch or None."""
        # pylint: disable-msg=W0612
        a_bzrdir = bzrdir.BzrDir.open_containing(self.root_dir)[0]
        try:
            branch = a_bzrdir.open_branch()
            branch.lock_read()
            try:
                revno, head = branch.last_revision_info()
            finally:
                branch.unlock()
        except NotBranchError:
            revno = None
        return revno

    def _findComments(self):
        """Set the list of XXX comments in files below a directory."""
        comments = []
        for file_path in self._findFiles():
            comments.extend(self._extractComments(file_path))
        return comments


    def _findFiles(self):
        """Generate a list of matching files below a directory."""
        for path, subdirs, files in os.walk(self.root_dir):
            subdirs[:] = [dir_name for dir_name in subdirs
                          if self._isTraversable(path, dir_name)]
            for file in files:
                file_path = os.path.join(path, file)
                if os.path.islink(file_path):
                    continue
                if excluded_file_re.match(file) is None:
                    yield os.path.join(path, file)

    def _isTraversable(self, path, dir_name):
        """Return True if path/dir_name does not match dir_re pattern."""
        return excluded_dir_re.match(os.path.join(path, dir_name)) is None

    def _extractComments(self, file_path):
        """Return a list of XXX comments in a file.

        :param file_path: The path of the file that contains XXX comments.
        """
        comments = []
        file = open(file_path, 'r')
        try:
            comment = None
            for line_num, line in enumerate(file):
                xxx_mark = self.xxx_re.match(line)
                if xxx_mark is None and comment is None:
                    # The loop is not in a comment or starting a comment.
                    continue
                if xxx_mark is not None and comment is not None:
                    # Two XXX comments run together.
                    self._finaliseComment(comments, comment)
                    comment = None
                if xxx_mark is not None and comment is None:
                    # Start a new comment.
                    comment = self.extractMetadata(line)
                    comment['file_path'] = file_path
                    comment['line_no'] = line_num + 1
                    comment['context_list'] = []
                elif '#' in line and '##' not in line:
                    # Continue collecting the comment text.
                    leading_, text = line.split('#', 1)
                    comment['text_list'].append(text.lstrip())
                elif xxx_mark is None and len(comment['context_list']) < 2:
                    # Collect the code context of the comment.
                    comment['context_list'].append(line)
                elif xxx_mark is None and len(comment['context_list']) == 2:
                    # Finalise the comment.
                    comment['context_list'].append(line)
                    self._finaliseComment(comments, comment)
                    comment = None
                else:
                    raise ValueError, (
                        "comment or xxx_mark are in an unknown state.")
            if comment is not None:
                self._finaliseComment(comments, comment)
        finally:
            file.close()
        return comments

    def _finaliseComment(self, comments, comment):
        """Replace the lists with strs and append the comment to comments."""
        context = ''.join(comment['context_list'])
        if context.strip() == '':
            # Whitespace is not context; do not store it.
            context = ''
        comment['context'] = context
        comment['text'] = ''.join(comment['text_list']).strip()
        del comment['context_list']
        del comment['text_list']
        comments.append(comment)

    # The standard XXX comment form of:
    # 'XXX: First Last Name 2007-07-01 bug=nnnn spec=cccc:'
    # Colons, commas, and spaces may follow each token.
    xxx_person_date_re = re.compile(r"""
        .*XXX[:,]?[ ]                               # The XXX indicator.
        (?P<person>[a-zA-Z][^:]*[\w])[,: ]*         # The persons's nick.
        (?P<date>\d\d\d\d[/-]?\d\d[/-]?\d\d)[,: ]*  # The date in YYYY-MM-DD.
        (?:[(]?bug[s]?[= ](?P<bug>[\w-]*)[),: ]*)?  # The bug id.
        (?:[(]?spec[= ](?P<spec>[\w-]*)[),: ]*)?    # The spec name.
        (?P<text>.*)                                # The comment text.
        """, re.VERBOSE)

    # An uncommon XXX comment form of:
    # 'XXX: 2007-01-01 First Last Name bug=nnnn spec=cccc:'
    # Colons, commas, and spaces may follow each token.
    xxx_date_person_re = re.compile(r"""
        .*XXX[:,]?[ ]                               # The XXX indicator.
        (?P<date>\d\d\d\d[/-]?\d\d[/-]?\d\d)[,: ]*  # The date in YYYY-MM-DD.
        (?P<person>[a-zA-Z][\w]+)[,: ]*             # The person's nick.
        (?:[(]?bug[s]?[= ](?P<bug>[\w-]*)[),: ]*)?  # The bug id.
        (?:[(]?spec[= ](?P<spec>[\w-]*)[),: ]*)?    # The spec name.
        (?P<text>.*)                                # The comment text.
        """, re.VERBOSE)

    def extractMetadata(self, comment_line):
        """Return a dict of metadata extracted from the comment line.

        :param comment_line: The first line of an XXX comment contains the
            metadata.
        :return: dict(person, date, bug, spec, and [text]). The text is the
        same as remainder of the comment_line after the metadata is extracted.
        """
        comment = dict(person=None, date=None, bug=None, spec=None, text=[])
        match = (self.xxx_person_date_re.match(comment_line)
                 or self.xxx_date_person_re.match(comment_line))
        if match is not None:
            # This comment follows a known style.
            comment['person'] = match.group('person')
            comment['date'] = match.group('date')
            comment['bug'] = match.group('bug') or None
            comment['spec'] = match.group('spec') or None
            text = match.group('text').lstrip(':, ')
        else:
            # Unknown comment format.
            text = comment_line

        text = text.strip()
        comment['text_list'] = [text + '\n']
        return comment

    def write(self):
        """Write the total count of comments."""
        output_file = self._open()
        try:
            output_file.write('%s\n' % len(self.comments))
            output_file.flush()
        finally:
            self._close(output_file)

    def _open(self):
        """Open the output_name or use STDOUT."""
        if self.output_name is not None:
            return open(self.output_name, 'w')
        return sys.stdout


class HTMLReport(Report):
    """A HTML XXX report."""
    # Match URLs.
    http_re = re.compile('(https?://[^ \n&]*)')

    # Match bugs.
    bug_link_re = re.compile(r'\b(bugs?:?) #?(\d+)', re.IGNORECASE)

    report_top = """\
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
  <head>
    <title>XXX Comment report for Launchpad</title>
    <style type="text/css" media="screen, print">
      @import url(https://launchpad.net/+icing/rev6895/+style-slimmer.css);
      .context {
        border: #666 1px dashed;
        width: 50em;
      }
    </style>
  </head>

  <body style="margin: 1em;">
    <h1>XXX Comment report for Launchpad</h1>

    <p>
      A report of the XXX comments in the rocketfuel branch. All files
      except *.pyc files were examined.
      <br />This report may also be available as a tab-delimted file:
      <a href="xxx-report.csv">xxx-report.csv</a>
    </p>

    <h3>Summary</h3>

    <p>
      There are <strong>%(commentcount)s XXX comments</strong>
      in <strong>revno: %(revno)s</strong>.
      <br />Generated on %(reporttime)s.
    </p>

    <hr/>

    <h3>Listing</h3>

    <ol>"""

    report_comment = """
      <li>
        <div>
          <strong>File: %(file_path)s:%(line_no)s</strong>
        </div>
        <div style="margin: .5em 0em 0em 0em;">
        <strong class="xxx">XXX</strong>:
        <strong class="person">%(person)s</strong>
        <strong class="date">%(date)s</strong>
        bug %(bugurl)s
        spec %(specurl)s
        </div>
        <pre style="margin-top: 0px;">%(text)s</pre>
        <pre class="context">%(context)s</pre>
      </li>"""

    report_bottom = """
    </ol>
  </body>
</html>
"""

    def write(self):
        """Write the report in HTML format."""
        report_time = time.strftime(
            "%a, %d %b %Y %H:%M:%S UTC", time.gmtime())
        output_file = self._open()
        try:
            output_file.write(
                self.report_top % {"commentcount": len(self.comments),
                              "reporttime": report_time,
                              "revno": self.revno})

            for comment in self.comments:
                comment['text'] = self.markupText(comment['text'])
                comment['context'] = self.markupText(comment['context'])
                if comment['bug'] is not None:
                    comment['bugurl'] = (
                        '<a href="https://bugs.launchpad.net/bugs/%s">%s</a>'
                        % (comment['bug'], comment['bug']))
                else:
                    comment['bugurl'] = comment['bug']
                if comment['spec'] is not None:
                    comment['specurl'] = (
                        '<a href="https://blueprints.launchpad.net'
                        '/launchpad-project/+specs?searchtext=%s">%s</a>'
                        % (comment['spec'], comment['spec']))
                else:
                    comment['specurl'] = comment['spec']
                output_file.write(self.report_comment % comment)

            output_file.write(self.report_bottom)
            output_file.flush()
        finally:
            self._close(output_file)

    def markupText(self, text):
        """Return the line as HTML markup.

        :param text: The text to escape and link.
        """
        text = cgi.escape(text)
        text = self.http_re.sub(r'<a href="\1">\1</a>', text)
        bug_sub = r'<a href="https://bugs.launchpad.net/bugs/\2">\1 \2</a>'
        text = self.bug_link_re.sub(bug_sub, text)
        return text


class CSVReport(Report):
    """A CSV XXX report."""
    report_header = (
        'File_Path, Line_No, Person, Date, Bug, Spec, Text\n')
    report_comment = (
        '%(file_path)s, %(line_no)s, '
        '%(person)s, %(date)s, %(bug)s, %(spec)s, %(text)s\n')

    def markupText(self, text):
        """Return the line as TSV markup.

        :param text: The text to escape.
        """
        if text is not None:
            return text.replace('\n', ' ').replace(',', ';')

    def write(self):
        """Write the report in CSV format."""
        output_file = self._open()
        try:
            output_file.write(self.report_header)
            for comment in self.comments:
                comment['person'] = self.markupText(comment['person'])
                comment['text'] = self.markupText(comment['text'])
                output_file.write(self.report_comment % comment)
            output_file.flush()
        finally:
            self._close(output_file)


class TSVReport(CSVReport):
    """A TSV XXX report."""
    report_header = (
        'File_Path\tLine_No\tPerson\tDate\tBug\tSpec\tText\n')
    report_comment = (
        '%(file_path)s\t%(line_no)s\t'
        '%(person)s\t%(date)s\t%(bug)s\t%(spec)s\t%(text)s\n')

    def markupText(self, text):
        """Return the line as TSV markup.

        :param text: The text to escape.
        """
        if text is not None:
            return text.replace('\n', ' ').replace('\t', ' ')


def get_option_parser():
    """Return the option parser for this program."""
    usage = dedent("""    %prog [options] <root-dir>

    Create a report of all the XXX comments in the files below a directory.
    Set the -f option to 'count' to print the total number of XXX comments,
    which is the default when -f is not set.""")
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-f", "--format", dest="format", default="count",
        help="the format of the report: count, html, csv, tsv")
    parser.add_option(
        "-o", "--output", dest="output_name",
        help="the name of the output file, otherwise STDOUT is used")
    return parser


def main(argv=None):
    """Run the command line operations."""
    if argv is None:
        argv = sys.argv
    parser = get_option_parser()
    (options, arguments) = parser.parse_args(args=argv[1:])
    if len(arguments) != 1:
        parser.error('No root directory was provided.')
    root_dir = arguments[0]

    if options.format.lower() == 'html':
        report = HTMLReport(root_dir, options.output_name)
    elif options.format.lower() == 'tsv':
        report = TSVReport(root_dir, options.output_name)
    elif options.format.lower() == 'csv':
        report = CSVReport(root_dir, options.output_name)
    else:
        report = Report(root_dir, options.output_name)
    report.write()
    sys.exit(0)


if __name__ == '__main__':
    sys.exit(main())