~launchpad-pqm/launchpad/devel

11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
1
#!/usr/bin/env python2.6
11799.4.3 by Gavin Panella
Allow specifying the project.
2
# -*- mode: python -*-
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
3
4
from base64 import standard_b64encode
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
5
from optparse import OptionParser
11799.4.6 by Gavin Panella
Move the __main__ content into a main() function.
6
import sys
11799.4.7 by Gavin Panella
Create better reporting functions.
7
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
8
from lxml import etree
9
10
11
NS = "https://launchpad.net/xmlns/2006/bugs"
12
13
14
def norm_text(elem):
15
    if elem is not None:
16
        if elem.text is None:
17
            elem.text = u""
18
        else:
19
            elem.text = elem.text.strip()
20
21
22
def truncate(text, message=None):
23
    lines = text.splitlines()
24
    if len(lines) >= 30:
25
        if message is None:
26
            message = "[Truncated]"
27
        else:
28
            message = "[Truncated; %s]" % message
29
        return u"%s...\n\n%s" % (
30
            "\n".join(lines[:30]).strip(), message)
31
    else:
32
        return text
33
34
11799.4.7 by Gavin Panella
Create better reporting functions.
35
def problem(message):
36
    sys.stderr.write("{0}\n".format(message))
37
38
39
def problem_detail(message):
40
    sys.stderr.write("  {0}\n".format(message))
41
42
43
def problem_resolution(message):
44
    sys.stderr.write("  --> {0}\n".format(message))
45
46
47
def problem_resolved():
48
    sys.stderr.write("\n")
49
50
11799.4.9 by Gavin Panella
Rename munge() to massage().
51
def massage(root, project_name, fix_nickname, tag_nickname):
11799.4.4 by Gavin Panella
Move the munging code into a function.
52
    """Fix problems in the bug import XML tree.
53
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
54
    This includes:
11799.4.4 by Gavin Panella
Move the munging code into a function.
55
56
    - Adding a tags element if one does not exist,
57
58
    - Fixing up the bug nickname, adding the existing nickname as a tag,
59
14273.1.7 by Gavin Panella
Update docs.
60
    - Resolving duplicates to a bug that is not itself a duplicate
61
      (i.e. remove chains of duplicates),
62
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
63
    - Fixing up the description, including truncating it if it's too long,
11799.4.4 by Gavin Panella
Move the munging code into a function.
64
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
65
    - Fixing up the first comment, including truncating it if it's too long,
11799.4.4 by Gavin Panella
Move the munging code into a function.
66
67
    - Normalizing whitespace.
68
69
    """
14273.1.6 by Gavin Panella
Resolve duplicateof as far as it will go.
70
    # Resolve duplicates as far as they'll go.
71
    duplicates = dict(
72
        (node.getparent().get("id"), node.text)
73
        for node in root.findall('{%s}bug/{%s}duplicateof' % (NS, NS))
74
        if node.text is not None and node.text.isdigit())
75
76
    def resolve(bug_id):
77
        dupe_of = duplicates.get(bug_id)
78
        return (bug_id if dupe_of is None else resolve(dupe_of))
79
14273.1.8 by Gavin Panella
Prettier way to resolve duplicates.
80
    duplicates = dict(
81
        (bug_id, resolve(bug_id)) for bug_id in duplicates)
14273.1.6 by Gavin Panella
Resolve duplicateof as far as it will go.
82
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
83
    # Scan the tree, fixing up issues.
84
    for bug in root.findall('{%s}bug' % NS):
85
        # Get or create the tags element.
86
        tags = bug.find('{%s}tags' % NS)
87
        if tags is None:
88
            tags = etree.SubElement(bug, '{%s}tags' % NS)
89
90
        nickname = bug.find('{%s}nickname' % NS)
91
        if nickname is None:
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
92
            # Add an empty nickname to be filled in later.
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
93
            nickname = etree.SubElement(bug, '{%s}nickname' % NS)
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
94
        elif tag_nickname:
95
            # Add the original nickname as a tag.
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
96
            etree.SubElement(tags, '{%s}tag' % NS).text = nickname.text
97
98
        # Change the nickname.
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
99
        if nickname.text is None or fix_nickname:
100
            nickname.text = u"%s-%s" % (project_name, bug.get('id'))
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
101
14273.1.6 by Gavin Panella
Resolve duplicateof as far as it will go.
102
        # Resolve duplicateof, if it exists.
103
        if bug.get("id") in duplicates:
104
            bug.find("{%s}duplicateof" % NS).text = duplicates[bug.get("id")]
105
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
106
        # Get the first comment and its text. We'll need these later.
107
        first_comment = bug.find('{%s}comment' % NS)
108
        first_comment_text = first_comment.find('{%s}text' % NS)
109
        norm_text(first_comment_text)
110
111
        # Check the description.
112
        description = bug.find('{%s}description' % NS)
113
        norm_text(description)
114
        if len(description.text) == 0:
11799.4.7 by Gavin Panella
Create better reporting functions.
115
            problem("Bug %s has no description." % bug.get('id'))
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
116
            # Try and get the description from the first comment.
117
            if first_comment_text is None:
11799.4.7 by Gavin Panella
Create better reporting functions.
118
                problem_detail("No comments!")
119
                problem_resolution("Setting description to '-'.")
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
120
                description.text = u'-'
121
            elif len(first_comment_text.text) == 0:
11799.4.7 by Gavin Panella
Create better reporting functions.
122
                problem_detail("First comment has no text!")
123
                problem_resolution("Setting description to '-'.")
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
124
                description.text = u'-'
125
            else:
11799.4.7 by Gavin Panella
Create better reporting functions.
126
                problem_detail("First comment has text.")
127
                problem_resolution("Removing description.")
11799.4.10 by Gavin Panella
Fix comment.
128
                # The spec says that the description is not optional, but the
129
                # importer treats it as optional.
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
130
                bug.remove(description)
11799.4.7 by Gavin Panella
Create better reporting functions.
131
            problem_resolved()
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
132
        elif len(description.text) > 50000:
11799.4.7 by Gavin Panella
Create better reporting functions.
133
            problem(
134
                "Bug %s's description is too long (%d chars)." % (
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
135
                    bug.get('id'), len(description.text),))
136
            # Compare the description to the first comment. If it's
137
            # the same, we don't need the description.
138
            if first_comment_text is None:
11799.4.7 by Gavin Panella
Create better reporting functions.
139
                problem_detail("No comments!")
140
                problem_resolution("Adding comment.")
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
141
                raise NotImplementedError("Add a comment.")
142
            elif description.text == first_comment_text.text:
11799.4.7 by Gavin Panella
Create better reporting functions.
143
                problem_detail('Description is same as first comment.')
144
                problem_resolution('Trimming description.')
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
145
                # It's safe to point the user to an attachment here,
146
                # even though it has not yet been created. It will be
147
                # created later because the first comment is also too
148
                # long.
149
                description.text = truncate(
150
                    description.text, 'see "Full description" attachment')
151
            else:
11799.4.7 by Gavin Panella
Create better reporting functions.
152
                problem_resolution("Truncating description.")
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
153
                raise NotImplementedError("Fix overlong description.")
11799.4.7 by Gavin Panella
Create better reporting functions.
154
            problem_resolved()
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
155
156
        # Check first comment text.
157
        if first_comment_text is not None:
158
            if len(first_comment_text.text) == 0:
11799.4.7 by Gavin Panella
Create better reporting functions.
159
                problem(
160
                    "Bug %s's first comment has no text." % bug.get('id'))
161
                problem_resolution("Setting comment text to '-'.")
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
162
                first_comment_text.text = u'-'
11799.4.7 by Gavin Panella
Create better reporting functions.
163
                problem_resolved()
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
164
            elif len(first_comment_text.text) > 50000:
11799.4.7 by Gavin Panella
Create better reporting functions.
165
                problem(
166
                    "Bug %s's first comment is too long (%d chars)." % (
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
167
                        bug.get('id'), len(first_comment_text.text)))
168
                # Save the original text as an attachment.
11799.4.7 by Gavin Panella
Create better reporting functions.
169
                problem_resolution('Adding attachment.')
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
170
                attachment = etree.SubElement(
171
                    first_comment, '{%s}attachment' % NS)
172
                etree.SubElement(attachment, '{%s}filename' % NS).text = (
11799.4.3 by Gavin Panella
Allow specifying the project.
173
                    u"%s-bug-%s-full-description.txt" % (
174
                        project_name, bug.get('id')))
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
175
                etree.SubElement(attachment, '{%s}title' % NS).text = (
176
                    u"Full description (text/plain, utf-8)")
177
                etree.SubElement(attachment, '{%s}mimetype' % NS).text = (
178
                    u"text/plain")
179
                etree.SubElement(attachment, '{%s}contents' % NS).text = (
180
                    standard_b64encode(
181
                        first_comment_text.text.encode('utf-8')))
182
                # Trim the comment text.
11799.4.7 by Gavin Panella
Create better reporting functions.
183
                problem_resolution('Trimming comment text.')
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
184
                first_comment_text.text = truncate(
185
                    first_comment_text.text,
186
                    'see "Full description" attachment')
11799.4.7 by Gavin Panella
Create better reporting functions.
187
                problem_resolved()
11799.4.1 by Gavin Panella
Script to alter/repair bug import XML from the Openfiler project. May be generally useful.
188
11799.4.4 by Gavin Panella
Move the munging code into a function.
189
11799.4.6 by Gavin Panella
Move the __main__ content into a main() function.
190
def main(arguments):
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
191
    # optparse.OptionParser uses lower-case for usage and help text by
192
    # default. This is distressing, so it is corrected for below.
193
    usage = "Usage: %prog [options]"
194
    description = """
195
        This acts as a filter: pipe bug import XML into stdin and capture
14273.1.7 by Gavin Panella
Update docs.
196
        stdout. By default it removes duplicate chains and ensures that bug
197
        descriptions and the first comment are correct. If either the
198
        description or the first comment exceeds 50,000 characters it is
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
199
        truncated and an attachment is created to hold the original.
200
        """
201
    parser = OptionParser(
202
        usage=usage,
203
        description=description.strip(),
204
        add_help_option=False)
205
    parser.add_option(
206
        "-p", "--project", dest="project_name", metavar="NAME",
207
        help="The project to which this import data refers.")
208
    parser.add_option(
209
        "--fix-nickname", action="store_true", dest="fix_nickname",
210
        help="Normalize the nickname to ${project_name}-${bug-id}.")
211
    parser.add_option(
212
        "--tag-nickname", action="store_true", dest="tag_nickname",
213
        help="Add the original bug nickname as a tag.")
214
    parser.add_option(
215
        "-h", "--help", action="help",
216
        help="Show this help message and exit.")
217
    parser.set_defaults(
218
        project_name=None,
219
        fix_nickname=False,
220
        tag_nickname=False)
221
14273.1.5 by Gavin Panella
Allow files to be passed in as arguments. This helps debugging with pdb.
222
    options, filenames = parser.parse_args(arguments)
11799.4.5 by Gavin Panella
Add an option parser and accept some nickname-related options.
223
    if options.project_name is None:
224
        parser.error("A project name must be specified.")
225
14273.1.5 by Gavin Panella
Allow files to be passed in as arguments. This helps debugging with pdb.
226
    if len(filenames) == 0:
227
        filenames = ["-"]
228
229
    for filename in filenames:
230
        tree = etree.parse(sys.stdin if filename == "-" else filename)
231
        massage(
232
            root=tree.getroot(),
233
            project_name=options.project_name,
234
            fix_nickname=options.fix_nickname,
235
            tag_nickname=options.tag_nickname)
236
        tree.write(
237
            (sys.stdout if filename == "-" else filename), encoding='utf-8',
238
            pretty_print=True, xml_declaration=True)
11799.4.6 by Gavin Panella
Move the __main__ content into a main() function.
239
240
    return 0
241
242
243
if __name__ == '__main__':
244
    sys.exit(main(sys.argv[1:]))