~launchpad-pqm/launchpad/devel

7675.916.98 by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes.
1
# Copyright 2009-2010 Canonical Ltd.  This software is licensed under the
8687.15.13 by Karl Fogel
Add the copyright header block to files under lib/lp/archiveuploader/.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
3
4197.1.1 by Christian Reis
Break apart upload code into canonical/archiveuploader
4
"""Archive uploader utilities."""
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
5
6
__metaclass__ = type
7
8
__all__ = [
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
9
    'DpkgSourceError',
10
    'extract_dpkg_source',
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
11
    're_taint_free',
12
    're_isadeb',
13
    're_issource',
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
14
    're_is_component_orig_tar_ext',
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
15
    're_no_epoch',
16
    're_no_revision',
17
    're_valid_version',
18
    're_valid_pkg_name',
19
    're_changes_file_name',
20
    're_extract_src_version',
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
21
    'get_source_file_extension',
7675.432.5 by William Grant
Move determine_binary_file_type alongside determine_source_file_type in lp.archiveuploader.utils.
22
    'determine_binary_file_type',
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
23
    'determine_source_file_type',
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
24
    'prefix_multi_line_string',
25
    'safe_fix_maintainer',
26
    'ParseMaintError',
27
    ]
28
29
30
import email.Header
31
import re
11680.5.4 by Jelmer Vernooij
More tests.
32
import signal
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
33
import subprocess
3496.1.44 by Celso Providelo
review comments from kiko (take I and II), missing publishing/pool redesign and tests
34
12398.2.14 by Jonathan Lange
Move canonical.encoding to lp.services.encoding.
35
from lp.services.encoding import (
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
36
    ascii_smash,
37
    guess as guess_encoding,
38
    )
11411.6.6 by Julian Edwards
move BinaryPackageFormat and BinaryPackageFileType
39
from lp.soyuz.enums import BinaryPackageFileType
3147.2.19 by Celso Providelo
Fix bug # 32148, wrapping fix_maintainer function to allow both, unicode and string arguments.
40
2501 by Canonical.com Patch Queue Manager
Bring failed binary imports down from 472 to 6 by finding sources more aggressively. r=spiv
41
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
42
class DpkgSourceError(Exception):
43
44
    _fmt = "Unable to unpack source package (%(result)s): %(output)s"
45
11680.5.3 by Jelmer Vernooij
Use extract_dpkg_source in ftp-master.
46
    def __init__(self, command, output, result):
11680.5.5 by Jelmer Vernooij
Review feedback from Gavin.
47
        super(DpkgSourceError, self).__init__(
48
            self._fmt % {
49
                "output": output, "result": result, "command": command})
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
50
        self.output = output
51
        self.result = result
11680.5.3 by Jelmer Vernooij
Use extract_dpkg_source in ftp-master.
52
        self.command = command
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
53
54
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
55
re_taint_free = re.compile(r"^[-+~/\.\w]+$")
56
8213.5.1 by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205.
57
re_isadeb = re.compile(r"(.+?)_(.+?)_(.+)\.(u?d?deb)$")
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
58
7675.424.41 by William Grant
Merge the rest of part1.
59
source_file_exts = [
12632.3.1 by Colin Watson
Add support for xz-compressed tarballs in source packages.
60
    'orig(?:-.+)?\.tar\.(?:gz|bz2|xz)', 'diff.gz',
61
    '(?:debian\.)?tar\.(?:gz|bz2|xz)', 'dsc']
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
62
re_issource = re.compile(
12019.5.1 by William Grant
Correctly parse component orig filenames with underscores.
63
    r"([^_]+)_(.+?)\.(%s)" % "|".join(ext for ext in source_file_exts))
12632.3.1 by Colin Watson
Add support for xz-compressed tarballs in source packages.
64
re_is_component_orig_tar_ext = re.compile(r"^orig-(.+).tar.(?:gz|bz2|xz)$")
65
re_is_orig_tar_ext = re.compile(r"^orig.tar.(?:gz|bz2|xz)$")
66
re_is_debian_tar_ext = re.compile(r"^debian.tar.(?:gz|bz2|xz)$")
67
re_is_native_tar_ext = re.compile(r"^tar.(?:gz|bz2|xz)$")
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
68
69
re_no_epoch = re.compile(r"^\d+\:")
70
re_no_revision = re.compile(r"-[^-]+$")
71
72
re_valid_version = re.compile(r"^([0-9]+:)?[0-9A-Za-z\.\-\+~:]+$")
73
re_valid_pkg_name = re.compile(r"^[\dA-Za-z][\dA-Za-z\+\-\.]+$")
74
re_changes_file_name = re.compile(r"([^_]+)_([^_]+)_([^\.]+).changes")
75
re_extract_src_version = re.compile(r"(\S+)\s*\((.*)\)")
76
8213.5.1 by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205.
77
re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>")
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
78
79
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
80
def get_source_file_extension(filename):
81
    """Get the extension part of a source file name."""
7675.424.10 by William Grant
Don't crash on matching failures while determining source file typs.
82
    match = re_issource.match(filename)
83
    if match is None:
84
        return None
85
    return match.group(3)
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
86
87
88
def determine_source_file_type(filename):
89
    """Determine the SourcePackageFileType of the given filename."""
7675.424.32 by William Grant
Avoid a circular import.
90
    # Avoid circular imports.
91
    from lp.registry.interfaces.sourcepackage import SourcePackageFileType
92
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
93
    extension = get_source_file_extension(filename)
7675.424.10 by William Grant
Don't crash on matching failures while determining source file typs.
94
    if extension is None:
95
        return None
96
    elif extension == "dsc":
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
97
        return SourcePackageFileType.DSC
98
    elif extension == "diff.gz":
99
        return SourcePackageFileType.DIFF
100
    elif re_is_orig_tar_ext.match(extension):
101
        return SourcePackageFileType.ORIG_TARBALL
102
    elif re_is_component_orig_tar_ext.match(extension):
103
        return SourcePackageFileType.COMPONENT_ORIG_TARBALL
104
    elif re_is_debian_tar_ext.match(extension):
105
        return SourcePackageFileType.DEBIAN_TARBALL
106
    elif re_is_native_tar_ext.match(extension):
107
        return SourcePackageFileType.NATIVE_TARBALL
9849.3.5 by William Grant
Explicitly return a type of None if the type could not be determined, and test that behaviour.
108
    else:
109
        return None
7675.424.5 by William Grant
Add archiveuploader utility function to determine a source file's extension and type.
110
111
7675.432.5 by William Grant
Move determine_binary_file_type alongside determine_source_file_type in lp.archiveuploader.utils.
112
def determine_binary_file_type(filename):
113
    """Determine the BinaryPackageFileType of the given filename."""
114
    if filename.endswith(".deb"):
115
        return BinaryPackageFileType.DEB
116
    elif filename.endswith(".udeb"):
7675.432.7 by William Grant
Add a test for determine_binary_file_type, and fix the .udeb mapping.
117
        return BinaryPackageFileType.UDEB
11204.5.1 by William Grant
Add ddeb support to determinate_binary_file_type.
118
    elif filename.endswith(".ddeb"):
119
        return BinaryPackageFileType.DDEB
7675.432.5 by William Grant
Move determine_binary_file_type alongside determine_source_file_type in lp.archiveuploader.utils.
120
    else:
121
        return None
122
123
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
124
def prefix_multi_line_string(str, prefix, include_blank_lines=0):
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
125
    """Utility function to split an input string and prefix,
126
127
    Each line with a token or tag. Can be used for quoting text etc.
128
    """
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
129
    out = ""
130
    for line in str.split('\n'):
131
        line = line.strip()
132
        if line or include_blank_lines:
133
            out += "%s%s\n" % (prefix, line)
134
    # Strip trailing new line
135
    if out:
136
        out = out[:-1]
137
    return out
138
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
139
8213.5.1 by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205.
140
def extract_component_from_section(section, default_component="main"):
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
141
    component = ""
142
    if section.find("/") != -1:
143
        component, section = section.split("/")
144
    else:
145
        component = default_component
146
8213.5.1 by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205.
147
    return (section, component)
148
149
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
150
def force_to_utf8(s):
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
151
    """Forces a string to UTF-8.
152
153
    If the string isn't already UTF-8, it's assumed to be ISO-8859-1.
154
    """
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
155
    try:
156
        unicode(s, 'utf-8')
157
        return s
158
    except UnicodeError:
11204.5.6 by William Grant
Fix lint.
159
        latin1_s = unicode(s, 'iso8859-1')
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
160
        return latin1_s.encode('utf-8')
161
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
162
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
163
def rfc2047_encode(s):
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
164
    """Encodes a (header) string per RFC2047 if necessary.
165
166
    If the string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
167
    """
1159 by Canonical.com Patch Queue Manager
Fix breakage under Python 2.4
168
    if not s:
169
        return ''
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
170
    try:
1159 by Canonical.com Patch Queue Manager
Fix breakage under Python 2.4
171
        s.decode('us-ascii')
172
        #encodings.ascii.Codec().decode(s)
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
173
        return s
174
    except UnicodeError:
175
        pass
176
    try:
1159 by Canonical.com Patch Queue Manager
Fix breakage under Python 2.4
177
        s.decode('utf8')
178
        #encodings.utf_8.Codec().decode(s)
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
179
        h = email.Header.Header(s, 'utf-8', 998)
180
        return str(h)
181
    except UnicodeError:
182
        h = email.Header.Header(s, 'iso-8859-1', 998)
183
        return str(h)
184
185
186
class ParseMaintError(Exception):
187
    """Exception raised for errors in parsing a maintainer field.
188
189
    Attributes:
190
       message -- explanation of the error
191
    """
192
193
    def __init__(self, message):
194
        Exception.__init__(self)
11204.5.6 by William Grant
Fix lint.
195
        self.args = (message, )
8213.5.1 by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205.
196
        self.message = message
197
198
11204.5.6 by William Grant
Fix lint.
199
def fix_maintainer(maintainer, field_name="Maintainer"):
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
200
    """Parses a Maintainer or Changed-By field and returns:
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
201
202
    (1) an RFC822 compatible version,
203
    (2) an RFC2047 compatible version,
204
    (3) the name
205
    (4) the email
206
207
    The name is forced to UTF-8 for both (1) and (3).  If the name field
208
    contains '.' or ',', (1) and (2) are switched to 'email (name)' format.
209
    """
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
210
    maintainer = maintainer.strip()
211
    if not maintainer:
212
        return ('', '', '', '')
213
214
    if maintainer.find("<") == -1:
215
        email = maintainer
216
        name = ""
217
    elif (maintainer[0] == "<" and maintainer[-1:] == ">"):
218
        email = maintainer[1:-1]
219
        name = ""
220
    else:
221
        m = re_parse_maintainer.match(maintainer)
222
        if not m:
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
223
            raise ParseMaintError(
224
                "%s: doesn't parse as a valid %s field."
225
                % (maintainer, field_name))
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
226
        name = m.group(1)
227
        email = m.group(2)
228
        # Just in case the maintainer ended up with nested angles; check...
229
        while email.startswith("<"):
230
            email = email[1:]
231
232
    # Get an RFC2047 compliant version of the name
233
    rfc2047_name = rfc2047_encode(name)
234
235
    # Force the name to be UTF-8
236
    name = force_to_utf8(name)
237
11347.8.1 by Jelmer Vernooij
Add a comment explaining why maintainer fields with a full stop are mangled differently.
238
    # If the maintainer's name contains a full stop then the whole field will
239
    # not work directly as an email address due to a misfeature in the syntax
240
    # specified in RFC822; see Debian policy 5.6.2 (Maintainer field syntax)
241
    # for details.
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
242
    if name.find(',') != -1 or name.find('.') != -1:
243
        rfc822_maint = "%s (%s)" % (email, name)
244
        rfc2047_maint = "%s (%s)" % (email, rfc2047_name)
245
    else:
246
        rfc822_maint = "%s <%s>" % (name, email)
247
        rfc2047_maint = "%s <%s>" % (rfc2047_name, email)
248
249
    if email.find("@") == -1 and email.find("buildd_") != 0:
3804.1.23 by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests.
250
        raise ParseMaintError(
251
            "%s: no @ found in email address part." % maintainer)
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
252
253
    return (rfc822_maint, rfc2047_maint, name, email)
254
255
3147.2.19 by Celso Providelo
Fix bug # 32148, wrapping fix_maintainer function to allow both, unicode and string arguments.
256
def safe_fix_maintainer(content, fieldname):
257
    """Wrapper for fix_maintainer() to handle unicode and string argument.
258
259
    It verifies the content type and transform it in a unicode with guess()
13225.3.2 by Brad Crittenden
Fixed lint/typo
260
    before call ascii_smash(). Then we can safely call fix_maintainer().
3147.2.19 by Celso Providelo
Fix bug # 32148, wrapping fix_maintainer function to allow both, unicode and string arguments.
261
    """
262
    if type(content) != unicode:
263
        content = guess_encoding(content)
264
265
    content = ascii_smash(content)
266
267
    return fix_maintainer(content, fieldname)
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
268
269
270
def extract_dpkg_source(dsc_filepath, target):
271
    """Extract a source package by dsc file path.
272
273
    :param dsc_filepath: Path of the DSC file
274
    :param target: Target directory
275
    """
11680.5.5 by Jelmer Vernooij
Review feedback from Gavin.
276
11680.5.4 by Jelmer Vernooij
More tests.
277
    def subprocess_setup():
11680.5.5 by Jelmer Vernooij
Review feedback from Gavin.
278
        # Python installs a SIGPIPE handler by default. This is usually not
279
        # what non-Python subprocesses expect.
280
        # http://www.chiark.greenend.org.uk/ucgi/~cjwatson/ \
281
        #   blosxom/2009-07-02-python-sigpipe.html
11680.5.4 by Jelmer Vernooij
More tests.
282
        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
283
    args = ["dpkg-source", "-sn", "-x", dsc_filepath]
11680.5.4 by Jelmer Vernooij
More tests.
284
    dpkg_source = subprocess.Popen(
285
        args, stdout=subprocess.PIPE, cwd=target, stderr=subprocess.PIPE,
286
        preexec_fn=subprocess_setup)
11680.5.1 by Jelmer Vernooij
Add extract_dpkg_source function.
287
    output, unused = dpkg_source.communicate()
288
    result = dpkg_source.wait()
289
    if result != 0:
290
        dpkg_output = prefix_multi_line_string(output, "  ")
11680.5.3 by Jelmer Vernooij
Use extract_dpkg_source in ftp-master.
291
        raise DpkgSourceError(result=result, output=dpkg_output, command=args)