~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/usr/bin/env python2.6
# -*- mode: python -*-

from base64 import standard_b64encode
from optparse import OptionParser
import sys

from lxml import etree


NS = "https://launchpad.net/xmlns/2006/bugs"


def norm_text(elem):
    if elem is not None:
        if elem.text is None:
            elem.text = u""
        else:
            elem.text = elem.text.strip()


def truncate(text, message=None):
    lines = text.splitlines()
    if len(lines) >= 30:
        if message is None:
            message = "[Truncated]"
        else:
            message = "[Truncated; %s]" % message
        return u"%s...\n\n%s" % (
            "\n".join(lines[:30]).strip(), message)
    else:
        return text


def problem(message):
    sys.stderr.write("{0}\n".format(message))


def problem_detail(message):
    sys.stderr.write("  {0}\n".format(message))


def problem_resolution(message):
    sys.stderr.write("  --> {0}\n".format(message))


def problem_resolved():
    sys.stderr.write("\n")


def massage(root, project_name, fix_nickname, tag_nickname):
    """Fix problems in the bug import XML tree.

    This includes:

    - Adding a tags element if one does not exist,

    - Fixing up the bug nickname, adding the existing nickname as a tag,

    - Resolving duplicates to a bug that is not itself a duplicate
      (i.e. remove chains of duplicates),

    - Fixing up the description, including truncating it if it's too long,

    - Fixing up the first comment, including truncating it if it's too long,

    - Normalizing whitespace.

    """
    # Resolve duplicates as far as they'll go.
    duplicates = dict(
        (node.getparent().get("id"), node.text)
        for node in root.findall('{%s}bug/{%s}duplicateof' % (NS, NS))
        if node.text is not None and node.text.isdigit())

    def resolve(bug_id):
        dupe_of = duplicates.get(bug_id)
        return (bug_id if dupe_of is None else resolve(dupe_of))

    duplicates = dict(
        (bug_id, resolve(bug_id)) for bug_id in duplicates)

    # Scan the tree, fixing up issues.
    for bug in root.findall('{%s}bug' % NS):
        # Get or create the tags element.
        tags = bug.find('{%s}tags' % NS)
        if tags is None:
            tags = etree.SubElement(bug, '{%s}tags' % NS)

        nickname = bug.find('{%s}nickname' % NS)
        if nickname is None:
            # Add an empty nickname to be filled in later.
            nickname = etree.SubElement(bug, '{%s}nickname' % NS)
        elif tag_nickname:
            # Add the original nickname as a tag.
            etree.SubElement(tags, '{%s}tag' % NS).text = nickname.text

        # Change the nickname.
        if nickname.text is None or fix_nickname:
            nickname.text = u"%s-%s" % (project_name, bug.get('id'))

        # Resolve duplicateof, if it exists.
        if bug.get("id") in duplicates:
            bug.find("{%s}duplicateof" % NS).text = duplicates[bug.get("id")]

        # Get the first comment and its text. We'll need these later.
        first_comment = bug.find('{%s}comment' % NS)
        first_comment_text = first_comment.find('{%s}text' % NS)
        norm_text(first_comment_text)

        # Check the description.
        description = bug.find('{%s}description' % NS)
        norm_text(description)
        if len(description.text) == 0:
            problem("Bug %s has no description." % bug.get('id'))
            # Try and get the description from the first comment.
            if first_comment_text is None:
                problem_detail("No comments!")
                problem_resolution("Setting description to '-'.")
                description.text = u'-'
            elif len(first_comment_text.text) == 0:
                problem_detail("First comment has no text!")
                problem_resolution("Setting description to '-'.")
                description.text = u'-'
            else:
                problem_detail("First comment has text.")
                problem_resolution("Removing description.")
                # The spec says that the description is not optional, but the
                # importer treats it as optional.
                bug.remove(description)
            problem_resolved()
        elif len(description.text) > 50000:
            problem(
                "Bug %s's description is too long (%d chars)." % (
                    bug.get('id'), len(description.text),))
            # Compare the description to the first comment. If it's
            # the same, we don't need the description.
            if first_comment_text is None:
                problem_detail("No comments!")
                problem_resolution("Adding comment.")
                raise NotImplementedError("Add a comment.")
            elif description.text == first_comment_text.text:
                problem_detail('Description is same as first comment.')
                problem_resolution('Trimming description.')
                # It's safe to point the user to an attachment here,
                # even though it has not yet been created. It will be
                # created later because the first comment is also too
                # long.
                description.text = truncate(
                    description.text, 'see "Full description" attachment')
            else:
                problem_resolution("Truncating description.")
                raise NotImplementedError("Fix overlong description.")
            problem_resolved()

        # Check first comment text.
        if first_comment_text is not None:
            if len(first_comment_text.text) == 0:
                problem(
                    "Bug %s's first comment has no text." % bug.get('id'))
                problem_resolution("Setting comment text to '-'.")
                first_comment_text.text = u'-'
                problem_resolved()
            elif len(first_comment_text.text) > 50000:
                problem(
                    "Bug %s's first comment is too long (%d chars)." % (
                        bug.get('id'), len(first_comment_text.text)))
                # Save the original text as an attachment.
                problem_resolution('Adding attachment.')
                attachment = etree.SubElement(
                    first_comment, '{%s}attachment' % NS)
                etree.SubElement(attachment, '{%s}filename' % NS).text = (
                    u"%s-bug-%s-full-description.txt" % (
                        project_name, bug.get('id')))
                etree.SubElement(attachment, '{%s}title' % NS).text = (
                    u"Full description (text/plain, utf-8)")
                etree.SubElement(attachment, '{%s}mimetype' % NS).text = (
                    u"text/plain")
                etree.SubElement(attachment, '{%s}contents' % NS).text = (
                    standard_b64encode(
                        first_comment_text.text.encode('utf-8')))
                # Trim the comment text.
                problem_resolution('Trimming comment text.')
                first_comment_text.text = truncate(
                    first_comment_text.text,
                    'see "Full description" attachment')
                problem_resolved()


def main(arguments):
    # optparse.OptionParser uses lower-case for usage and help text by
    # default. This is distressing, so it is corrected for below.
    usage = "Usage: %prog [options]"
    description = """
        This acts as a filter: pipe bug import XML into stdin and capture
        stdout. By default it removes duplicate chains and ensures that bug
        descriptions and the first comment are correct. If either the
        description or the first comment exceeds 50,000 characters it is
        truncated and an attachment is created to hold the original.
        """
    parser = OptionParser(
        usage=usage,
        description=description.strip(),
        add_help_option=False)
    parser.add_option(
        "-p", "--project", dest="project_name", metavar="NAME",
        help="The project to which this import data refers.")
    parser.add_option(
        "--fix-nickname", action="store_true", dest="fix_nickname",
        help="Normalize the nickname to ${project_name}-${bug-id}.")
    parser.add_option(
        "--tag-nickname", action="store_true", dest="tag_nickname",
        help="Add the original bug nickname as a tag.")
    parser.add_option(
        "-h", "--help", action="help",
        help="Show this help message and exit.")
    parser.set_defaults(
        project_name=None,
        fix_nickname=False,
        tag_nickname=False)

    options, filenames = parser.parse_args(arguments)
    if options.project_name is None:
        parser.error("A project name must be specified.")

    if len(filenames) == 0:
        filenames = ["-"]

    for filename in filenames:
        tree = etree.parse(sys.stdin if filename == "-" else filename)
        massage(
            root=tree.getroot(),
            project_name=options.project_name,
            fix_nickname=options.fix_nickname,
            tag_nickname=options.tag_nickname)
        tree.write(
            (sys.stdout if filename == "-" else filename), encoding='utf-8',
            pretty_print=True, xml_declaration=True)

    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))