7675.916.98
by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes. |
1 |
# Copyright 2009-2010 Canonical Ltd. This software is licensed under the
|
8687.15.13
by Karl Fogel
Add the copyright header block to files under lib/lp/archiveuploader/. |
2 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
3 |
||
4197.1.1
by Christian Reis
Break apart upload code into canonical/archiveuploader |
4 |
"""Archive uploader utilities."""
|
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
5 |
|
6 |
__metaclass__ = type |
|
7 |
||
8 |
__all__ = [ |
|
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
9 |
'DpkgSourceError', |
10 |
'extract_dpkg_source', |
|
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
11 |
're_taint_free', |
12 |
're_isadeb', |
|
13 |
're_issource', |
|
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
14 |
're_is_component_orig_tar_ext', |
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
15 |
're_no_epoch', |
16 |
're_no_revision', |
|
17 |
're_valid_version', |
|
18 |
're_valid_pkg_name', |
|
19 |
're_changes_file_name', |
|
20 |
're_extract_src_version', |
|
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
21 |
'get_source_file_extension', |
7675.432.5
by William Grant
Move determine_binary_file_type alongside determine_source_file_type in lp.archiveuploader.utils. |
22 |
'determine_binary_file_type', |
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
23 |
'determine_source_file_type', |
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
24 |
'prefix_multi_line_string', |
25 |
'safe_fix_maintainer', |
|
26 |
'ParseMaintError', |
|
27 |
]
|
|
28 |
||
29 |
||
30 |
import email.Header |
|
31 |
import re |
|
11680.5.4
by Jelmer Vernooij
More tests. |
32 |
import signal |
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
33 |
import subprocess |
3496.1.44
by Celso Providelo
review comments from kiko (take I and II), missing publishing/pool redesign and tests |
34 |
|
12398.2.14
by Jonathan Lange
Move canonical.encoding to lp.services.encoding. |
35 |
from lp.services.encoding import ( |
11403.1.4
by Henning Eggers
Reformatted imports using format-imports script r32. |
36 |
ascii_smash, |
37 |
guess as guess_encoding, |
|
38 |
)
|
|
11411.6.6
by Julian Edwards
move BinaryPackageFormat and BinaryPackageFileType |
39 |
from lp.soyuz.enums import BinaryPackageFileType |
3147.2.19
by Celso Providelo
Fix bug # 32148, wrapping fix_maintainer function to allow both, unicode and string arguments. |
40 |
|
2501
by Canonical.com Patch Queue Manager
Bring failed binary imports down from 472 to 6 by finding sources more aggressively. r=spiv |
41 |
|
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
42 |
class DpkgSourceError(Exception): |
43 |
||
44 |
_fmt = "Unable to unpack source package (%(result)s): %(output)s" |
|
45 |
||
11680.5.3
by Jelmer Vernooij
Use extract_dpkg_source in ftp-master. |
46 |
def __init__(self, command, output, result): |
11680.5.5
by Jelmer Vernooij
Review feedback from Gavin. |
47 |
super(DpkgSourceError, self).__init__( |
48 |
self._fmt % { |
|
49 |
"output": output, "result": result, "command": command}) |
|
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
50 |
self.output = output |
51 |
self.result = result |
|
11680.5.3
by Jelmer Vernooij
Use extract_dpkg_source in ftp-master. |
52 |
self.command = command |
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
53 |
|
54 |
||
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
55 |
re_taint_free = re.compile(r"^[-+~/\.\w]+$") |
56 |
||
8213.5.1
by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205. |
57 |
re_isadeb = re.compile(r"(.+?)_(.+?)_(.+)\.(u?d?deb)$") |
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
58 |
|
7675.424.41
by William Grant
Merge the rest of part1. |
59 |
source_file_exts = [ |
12632.3.1
by Colin Watson
Add support for xz-compressed tarballs in source packages. |
60 |
'orig(?:-.+)?\.tar\.(?:gz|bz2|xz)', 'diff.gz', |
61 |
'(?:debian\.)?tar\.(?:gz|bz2|xz)', 'dsc'] |
|
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
62 |
re_issource = re.compile( |
12019.5.1
by William Grant
Correctly parse component orig filenames with underscores. |
63 |
r"([^_]+)_(.+?)\.(%s)" % "|".join(ext for ext in source_file_exts)) |
12632.3.1
by Colin Watson
Add support for xz-compressed tarballs in source packages. |
64 |
re_is_component_orig_tar_ext = re.compile(r"^orig-(.+).tar.(?:gz|bz2|xz)$") |
65 |
re_is_orig_tar_ext = re.compile(r"^orig.tar.(?:gz|bz2|xz)$") |
|
66 |
re_is_debian_tar_ext = re.compile(r"^debian.tar.(?:gz|bz2|xz)$") |
|
67 |
re_is_native_tar_ext = re.compile(r"^tar.(?:gz|bz2|xz)$") |
|
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
68 |
|
69 |
re_no_epoch = re.compile(r"^\d+\:") |
|
70 |
re_no_revision = re.compile(r"-[^-]+$") |
|
71 |
||
72 |
re_valid_version = re.compile(r"^([0-9]+:)?[0-9A-Za-z\.\-\+~:]+$") |
|
73 |
re_valid_pkg_name = re.compile(r"^[\dA-Za-z][\dA-Za-z\+\-\.]+$") |
|
74 |
re_changes_file_name = re.compile(r"([^_]+)_([^_]+)_([^\.]+).changes") |
|
75 |
re_extract_src_version = re.compile(r"(\S+)\s*\((.*)\)") |
|
76 |
||
8213.5.1
by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205. |
77 |
re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>") |
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
78 |
|
79 |
||
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
80 |
def get_source_file_extension(filename): |
81 |
"""Get the extension part of a source file name."""
|
|
7675.424.10
by William Grant
Don't crash on matching failures while determining source file typs. |
82 |
match = re_issource.match(filename) |
83 |
if match is None: |
|
84 |
return None |
|
85 |
return match.group(3) |
|
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
86 |
|
87 |
||
88 |
def determine_source_file_type(filename): |
|
89 |
"""Determine the SourcePackageFileType of the given filename."""
|
|
7675.424.32
by William Grant
Avoid a circular import. |
90 |
# Avoid circular imports.
|
91 |
from lp.registry.interfaces.sourcepackage import SourcePackageFileType |
|
92 |
||
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
93 |
extension = get_source_file_extension(filename) |
7675.424.10
by William Grant
Don't crash on matching failures while determining source file typs. |
94 |
if extension is None: |
95 |
return None |
|
96 |
elif extension == "dsc": |
|
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
97 |
return SourcePackageFileType.DSC |
98 |
elif extension == "diff.gz": |
|
99 |
return SourcePackageFileType.DIFF |
|
100 |
elif re_is_orig_tar_ext.match(extension): |
|
101 |
return SourcePackageFileType.ORIG_TARBALL |
|
102 |
elif re_is_component_orig_tar_ext.match(extension): |
|
103 |
return SourcePackageFileType.COMPONENT_ORIG_TARBALL |
|
104 |
elif re_is_debian_tar_ext.match(extension): |
|
105 |
return SourcePackageFileType.DEBIAN_TARBALL |
|
106 |
elif re_is_native_tar_ext.match(extension): |
|
107 |
return SourcePackageFileType.NATIVE_TARBALL |
|
9849.3.5
by William Grant
Explicitly return a type of None if the type could not be determined, and test that behaviour. |
108 |
else: |
109 |
return None |
|
7675.424.5
by William Grant
Add archiveuploader utility function to determine a source file's extension and type. |
110 |
|
111 |
||
7675.432.5
by William Grant
Move determine_binary_file_type alongside determine_source_file_type in lp.archiveuploader.utils. |
112 |
def determine_binary_file_type(filename): |
113 |
"""Determine the BinaryPackageFileType of the given filename."""
|
|
114 |
if filename.endswith(".deb"): |
|
115 |
return BinaryPackageFileType.DEB |
|
116 |
elif filename.endswith(".udeb"): |
|
7675.432.7
by William Grant
Add a test for determine_binary_file_type, and fix the .udeb mapping. |
117 |
return BinaryPackageFileType.UDEB |
11204.5.1
by William Grant
Add ddeb support to determinate_binary_file_type. |
118 |
elif filename.endswith(".ddeb"): |
119 |
return BinaryPackageFileType.DDEB |
|
7675.432.5
by William Grant
Move determine_binary_file_type alongside determine_source_file_type in lp.archiveuploader.utils. |
120 |
else: |
121 |
return None |
|
122 |
||
123 |
||
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
124 |
def prefix_multi_line_string(str, prefix, include_blank_lines=0): |
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
125 |
"""Utility function to split an input string and prefix,
|
126 |
||
127 |
Each line with a token or tag. Can be used for quoting text etc.
|
|
128 |
"""
|
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
129 |
out = "" |
130 |
for line in str.split('\n'): |
|
131 |
line = line.strip() |
|
132 |
if line or include_blank_lines: |
|
133 |
out += "%s%s\n" % (prefix, line) |
|
134 |
# Strip trailing new line
|
|
135 |
if out: |
|
136 |
out = out[:-1] |
|
137 |
return out |
|
138 |
||
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
139 |
|
8213.5.1
by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205. |
140 |
def extract_component_from_section(section, default_component="main"): |
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
141 |
component = "" |
142 |
if section.find("/") != -1: |
|
143 |
component, section = section.split("/") |
|
144 |
else: |
|
145 |
component = default_component |
|
146 |
||
8213.5.1
by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205. |
147 |
return (section, component) |
148 |
||
149 |
||
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
150 |
def force_to_utf8(s): |
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
151 |
"""Forces a string to UTF-8.
|
152 |
||
153 |
If the string isn't already UTF-8, it's assumed to be ISO-8859-1.
|
|
154 |
"""
|
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
155 |
try: |
156 |
unicode(s, 'utf-8') |
|
157 |
return s |
|
158 |
except UnicodeError: |
|
11204.5.6
by William Grant
Fix lint. |
159 |
latin1_s = unicode(s, 'iso8859-1') |
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
160 |
return latin1_s.encode('utf-8') |
161 |
||
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
162 |
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
163 |
def rfc2047_encode(s): |
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
164 |
"""Encodes a (header) string per RFC2047 if necessary.
|
165 |
||
166 |
If the string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.
|
|
167 |
"""
|
|
1159
by Canonical.com Patch Queue Manager
Fix breakage under Python 2.4 |
168 |
if not s: |
169 |
return '' |
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
170 |
try: |
1159
by Canonical.com Patch Queue Manager
Fix breakage under Python 2.4 |
171 |
s.decode('us-ascii') |
172 |
#encodings.ascii.Codec().decode(s)
|
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
173 |
return s |
174 |
except UnicodeError: |
|
175 |
pass
|
|
176 |
try: |
|
1159
by Canonical.com Patch Queue Manager
Fix breakage under Python 2.4 |
177 |
s.decode('utf8') |
178 |
#encodings.utf_8.Codec().decode(s)
|
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
179 |
h = email.Header.Header(s, 'utf-8', 998) |
180 |
return str(h) |
|
181 |
except UnicodeError: |
|
182 |
h = email.Header.Header(s, 'iso-8859-1', 998) |
|
183 |
return str(h) |
|
184 |
||
185 |
||
186 |
class ParseMaintError(Exception): |
|
187 |
"""Exception raised for errors in parsing a maintainer field.
|
|
188 |
||
189 |
Attributes:
|
|
190 |
message -- explanation of the error
|
|
191 |
"""
|
|
192 |
||
193 |
def __init__(self, message): |
|
194 |
Exception.__init__(self) |
|
11204.5.6
by William Grant
Fix lint. |
195 |
self.args = (message, ) |
8213.5.1
by Celso Providelo
First take for supporting DDEBs uploads in Soyuz. Bug #285205. |
196 |
self.message = message |
197 |
||
198 |
||
11204.5.6
by William Grant
Fix lint. |
199 |
def fix_maintainer(maintainer, field_name="Maintainer"): |
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
200 |
"""Parses a Maintainer or Changed-By field and returns:
|
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
201 |
|
202 |
(1) an RFC822 compatible version,
|
|
203 |
(2) an RFC2047 compatible version,
|
|
204 |
(3) the name
|
|
205 |
(4) the email
|
|
206 |
||
207 |
The name is forced to UTF-8 for both (1) and (3). If the name field
|
|
208 |
contains '.' or ',', (1) and (2) are switched to 'email (name)' format.
|
|
209 |
"""
|
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
210 |
maintainer = maintainer.strip() |
211 |
if not maintainer: |
|
212 |
return ('', '', '', '') |
|
213 |
||
214 |
if maintainer.find("<") == -1: |
|
215 |
email = maintainer |
|
216 |
name = "" |
|
217 |
elif (maintainer[0] == "<" and maintainer[-1:] == ">"): |
|
218 |
email = maintainer[1:-1] |
|
219 |
name = "" |
|
220 |
else: |
|
221 |
m = re_parse_maintainer.match(maintainer) |
|
222 |
if not m: |
|
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
223 |
raise ParseMaintError( |
224 |
"%s: doesn't parse as a valid %s field." |
|
225 |
% (maintainer, field_name)) |
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
226 |
name = m.group(1) |
227 |
email = m.group(2) |
|
228 |
# Just in case the maintainer ended up with nested angles; check...
|
|
229 |
while email.startswith("<"): |
|
230 |
email = email[1:] |
|
231 |
||
232 |
# Get an RFC2047 compliant version of the name
|
|
233 |
rfc2047_name = rfc2047_encode(name) |
|
234 |
||
235 |
# Force the name to be UTF-8
|
|
236 |
name = force_to_utf8(name) |
|
237 |
||
11347.8.1
by Jelmer Vernooij
Add a comment explaining why maintainer fields with a full stop are mangled differently. |
238 |
# If the maintainer's name contains a full stop then the whole field will
|
239 |
# not work directly as an email address due to a misfeature in the syntax
|
|
240 |
# specified in RFC822; see Debian policy 5.6.2 (Maintainer field syntax)
|
|
241 |
# for details.
|
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
242 |
if name.find(',') != -1 or name.find('.') != -1: |
243 |
rfc822_maint = "%s (%s)" % (email, name) |
|
244 |
rfc2047_maint = "%s (%s)" % (email, rfc2047_name) |
|
245 |
else: |
|
246 |
rfc822_maint = "%s <%s>" % (name, email) |
|
247 |
rfc2047_maint = "%s <%s>" % (rfc2047_name, email) |
|
248 |
||
249 |
if email.find("@") == -1 and email.find("buildd_") != 0: |
|
3804.1.23
by Celso Providelo
Fix doc/nascentupload* tests. Added most of the nascentuploadfile tests. |
250 |
raise ParseMaintError( |
251 |
"%s: no @ found in email address part." % maintainer) |
|
1102
by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs |
252 |
|
253 |
return (rfc822_maint, rfc2047_maint, name, email) |
|
254 |
||
255 |
||
3147.2.19
by Celso Providelo
Fix bug # 32148, wrapping fix_maintainer function to allow both, unicode and string arguments. |
256 |
def safe_fix_maintainer(content, fieldname): |
257 |
"""Wrapper for fix_maintainer() to handle unicode and string argument.
|
|
258 |
||
259 |
It verifies the content type and transform it in a unicode with guess()
|
|
13225.3.2
by Brad Crittenden
Fixed lint/typo |
260 |
before call ascii_smash(). Then we can safely call fix_maintainer().
|
3147.2.19
by Celso Providelo
Fix bug # 32148, wrapping fix_maintainer function to allow both, unicode and string arguments. |
261 |
"""
|
262 |
if type(content) != unicode: |
|
263 |
content = guess_encoding(content) |
|
264 |
||
265 |
content = ascii_smash(content) |
|
266 |
||
267 |
return fix_maintainer(content, fieldname) |
|
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
268 |
|
269 |
||
270 |
def extract_dpkg_source(dsc_filepath, target): |
|
271 |
"""Extract a source package by dsc file path.
|
|
272 |
||
273 |
:param dsc_filepath: Path of the DSC file
|
|
274 |
:param target: Target directory
|
|
275 |
"""
|
|
11680.5.5
by Jelmer Vernooij
Review feedback from Gavin. |
276 |
|
11680.5.4
by Jelmer Vernooij
More tests. |
277 |
def subprocess_setup(): |
11680.5.5
by Jelmer Vernooij
Review feedback from Gavin. |
278 |
# Python installs a SIGPIPE handler by default. This is usually not
|
279 |
# what non-Python subprocesses expect.
|
|
280 |
# http://www.chiark.greenend.org.uk/ucgi/~cjwatson/ \
|
|
281 |
# blosxom/2009-07-02-python-sigpipe.html
|
|
11680.5.4
by Jelmer Vernooij
More tests. |
282 |
signal.signal(signal.SIGPIPE, signal.SIG_DFL) |
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
283 |
args = ["dpkg-source", "-sn", "-x", dsc_filepath] |
11680.5.4
by Jelmer Vernooij
More tests. |
284 |
dpkg_source = subprocess.Popen( |
285 |
args, stdout=subprocess.PIPE, cwd=target, stderr=subprocess.PIPE, |
|
286 |
preexec_fn=subprocess_setup) |
|
11680.5.1
by Jelmer Vernooij
Add extract_dpkg_source function. |
287 |
output, unused = dpkg_source.communicate() |
288 |
result = dpkg_source.wait() |
|
289 |
if result != 0: |
|
290 |
dpkg_output = prefix_multi_line_string(output, " ") |
|
11680.5.3
by Jelmer Vernooij
Use extract_dpkg_source in ftp-master. |
291 |
raise DpkgSourceError(result=result, output=dpkg_output, command=args) |