1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
|
# Copyright 2009 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
from cStringIO import StringIO
from datetime import datetime
import httplib
import unittest
from urllib2 import (
HTTPError,
urlopen,
)
from urlparse import urlparse
from lazr.uri import URI
import pytz
from storm.expr import SQL
import transaction
from zope.component import getUtility
from lp.services.config import config
from lp.services.database.lpstorm import IMasterStore
from lp.services.database.sqlbase import (
cursor,
flush_database_updates,
session_store,
)
from lp.services.librarian.client import (
get_libraryfilealias_download_path,
LibrarianClient,
)
from lp.services.librarian.interfaces import ILibraryFileAliasSet
from lp.services.librarian.interfaces.client import DownloadFailed
from lp.services.librarian.model import (
LibraryFileAlias,
TimeLimitedToken,
)
from lp.testing.layers import (
LaunchpadFunctionalLayer,
LaunchpadZopelessLayer,
)
def uri_path_replace(url, old, new):
"""Replace a substring of a URL's path."""
parsed = URI(url)
return str(parsed.replace(path=parsed.path.replace(old, new)))
class LibrarianWebTestCase(unittest.TestCase):
"""Test the librarian's web interface."""
layer = LaunchpadFunctionalLayer
dbuser = 'librarian'
# Add stuff to a librarian via the upload port, then check that it's
# immediately visible on the web interface. (in an attempt to test ddaa's
# 500-error issue).
def commit(self):
"""Synchronize database state."""
flush_database_updates()
transaction.commit()
def test_uploadThenDownload(self):
client = LibrarianClient()
# Do this 10 times, to try to make sure we get all the threads in the
# thread pool involved more than once, in case handling the second
# request is an issue...
for count in range(10):
# Upload a file. This should work without any exceptions being
# thrown.
sampleData = 'x' + ('blah' * (count%5))
fileAlias = client.addFile('sample', len(sampleData),
StringIO(sampleData),
contentType='text/plain')
# Make sure we can get its URL
url = client.getURLForAlias(fileAlias)
# However, we can't access it until we have committed,
# because the server has no idea what mime-type to send it as
# (NB. This could be worked around if necessary by having the
# librarian allow access to files that don't exist in the DB
# and spitting them out with an 'unknown' mime-type
# -- StuartBishop)
self.require404(url)
self.commit()
# Make sure we can download it using the API
fileObj = client.getFileByAlias(fileAlias)
self.assertEqual(sampleData, fileObj.read())
fileObj.close()
# And make sure the URL works too
fileObj = urlopen(url)
self.assertEqual(sampleData, fileObj.read())
fileObj.close()
def test_checkGzipEncoding(self):
# Files that end in ".txt.gz" are treated special and are returned
# with an encoding of "gzip" or "x-gzip" to accomodate requirements of
# displaying Ubuntu build logs in the browser. The mimetype should be
# "text/plain" for these files.
client = LibrarianClient()
contents = 'Build log...'
build_log = StringIO(contents)
alias_id = client.addFile(name="build_log.txt.gz",
size=len(contents),
file=build_log,
contentType="text/plain")
self.commit()
url = client.getURLForAlias(alias_id)
fileObj = urlopen(url)
mimetype = fileObj.headers['content-type']
encoding = fileObj.headers['content-encoding']
self.failUnless(mimetype == "text/plain",
"Wrong mimetype. %s != 'text/plain'." % mimetype)
self.failUnless(encoding == "gzip",
"Wrong encoding. %s != 'gzip'." % encoding)
def test_checkNoEncoding(self):
# Other files should have no encoding.
client = LibrarianClient()
contents = 'Build log...'
build_log = StringIO(contents)
alias_id = client.addFile(name="build_log.tgz",
size=len(contents),
file=build_log,
contentType="application/x-tar")
self.commit()
url = client.getURLForAlias(alias_id)
fileObj = urlopen(url)
mimetype = fileObj.headers['content-type']
self.assertRaises(KeyError, fileObj.headers.__getitem__,
'content-encoding')
self.failUnless(
mimetype == "application/x-tar",
"Wrong mimetype. %s != 'application/x-tar'." % mimetype)
def test_aliasNotFound(self):
client = LibrarianClient()
self.assertRaises(DownloadFailed, client.getURLForAlias, 99)
def test_oldurl(self):
# 'old' urls are in the form of http://server:port/cid/aid/fname
# which we want to continue supporting. The content id is simply
# ignored
client = LibrarianClient()
filename = 'sample.txt'
aid = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
self.commit()
url = client.getURLForAlias(aid)
self.assertEqual(urlopen(url).read(), 'sample')
old_url = uri_path_replace(url, str(aid), '42/%d' % aid)
self.assertEqual(urlopen(old_url).read(), 'sample')
# If the content and alias IDs are not integers, a 404 is raised
old_url = uri_path_replace(url, str(aid), 'foo/%d' % aid)
self.require404(old_url)
old_url = uri_path_replace(url, str(aid), '%d/foo' % aid)
self.require404(old_url)
def test_404(self):
client = LibrarianClient()
filename = 'sample.txt'
aid = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
self.commit()
url = client.getURLForAlias(aid)
self.assertEqual(urlopen(url).read(), 'sample')
# Change the aliasid and assert we get a 404
self.failUnless(str(aid) in url)
bad_id_url = uri_path_replace(url, str(aid), str(aid+1))
self.require404(bad_id_url)
# Change the filename and assert we get a 404
self.failUnless(filename in url)
bad_name_url = uri_path_replace(url, filename, 'different.txt')
self.require404(bad_name_url)
def test_duplicateuploads(self):
client = LibrarianClient()
filename = 'sample.txt'
id1 = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
id2 = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
self.failIfEqual(id1, id2, 'Got allocated the same id!')
self.commit()
self.failUnlessEqual(client.getFileByAlias(id1).read(), 'sample')
self.failUnlessEqual(client.getFileByAlias(id2).read(), 'sample')
def test_robotsTxt(self):
url = 'http://%s:%d/robots.txt' % (
config.librarian.download_host, config.librarian.download_port)
f = urlopen(url)
self.failUnless('Disallow: /' in f.read())
def test_headers(self):
client = LibrarianClient()
# Upload a file so we can retrieve it.
sample_data = 'blah'
file_alias_id = client.addFile(
'sample', len(sample_data), StringIO(sample_data),
contentType='text/plain')
url = client.getURLForAlias(file_alias_id)
# Change the date_created to a known value that doesn't match
# the disk timestamp. The timestamp on disk cannot be trusted.
file_alias = IMasterStore(LibraryFileAlias).get(
LibraryFileAlias, file_alias_id)
file_alias.date_created = datetime(
2001, 01, 30, 13, 45, 59, tzinfo=pytz.utc)
# Commit so the file is available from the Librarian.
self.commit()
# Fetch the file via HTTP, recording the interesting headers
result = urlopen(url)
last_modified_header = result.info()['Last-Modified']
cache_control_header = result.info()['Cache-Control']
# URLs point to the same content for ever, so we have a hardcoded
# 1 year max-age cache policy.
self.failUnlessEqual(cache_control_header, 'max-age=31536000, public')
# And we should have a correct Last-Modified header too.
self.failUnlessEqual(
last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
def get_restricted_file_and_public_url(self):
# Use a regular LibrarianClient to ensure we speak to the
# nonrestricted port on the librarian which is where secured
# restricted files are served from.
client = LibrarianClient()
fileAlias = client.addFile(
'sample', 12, StringIO('a'*12), contentType='text/plain')
# Note: We're deliberately using the wrong url here: we should be
# passing secure=True to getURLForAlias, but to use the returned URL
# we would need a wildcard DNS facility patched into urlopen; instead
# we use the *deliberate* choice of having the path of secure and
# insecure urls be the same, so that we can test it: the server code
# doesn't need to know about the fancy wildcard domains.
url = client.getURLForAlias(fileAlias)
# Now that we have a url which talks to the public librarian, make the
# file restricted.
IMasterStore(LibraryFileAlias).find(LibraryFileAlias,
LibraryFileAlias.id==fileAlias).set(
LibraryFileAlias.restricted==True)
self.commit()
return fileAlias, url
def test_restricted_subdomain_must_match_file_alias(self):
# IFF there is a .restricted. in the host, then the library file alias
# in the subdomain must match that in the path.
client = LibrarianClient()
fileAlias = client.addFile('sample', 12, StringIO('a'*12),
contentType='text/plain')
fileAlias2 = client.addFile('sample', 12, StringIO('b'*12),
contentType='text/plain')
self.commit()
url = client.getURLForAlias(fileAlias)
download_host = urlparse(config.librarian.download_url)[1]
if ':' in download_host:
download_host = download_host[:download_host.find(':')]
template_host = 'i%%d.restricted.%s' % download_host
path = get_libraryfilealias_download_path(fileAlias, 'sample')
# The basic URL must work.
urlopen(url)
# Use the network level protocol because DNS resolution won't work
# here (no wildcard support)
connection = httplib.HTTPConnection(
config.librarian.download_host,
config.librarian.download_port)
# A valid subdomain based URL must work.
good_host = template_host % fileAlias
connection.request("GET", path, headers={'Host': good_host})
response = connection.getresponse()
response.read()
self.assertEqual(200, response.status, response)
# A subdomain based URL trying to put fileAlias into the restricted
# domain of fileAlias2 must not work.
hostile_host = template_host % fileAlias2
connection.request("GET", path, headers={'Host': hostile_host})
response = connection.getresponse()
response.read()
self.assertEqual(404, response.status)
# A subdomain which matches the LFA but is nested under one that
# doesn't is also treated as hostile.
nested_host = 'i%d.restricted.i%d.restricted.%s' % (
fileAlias, fileAlias2, download_host)
connection.request("GET", path, headers={'Host': nested_host})
response = connection.getresponse()
response.read()
self.assertEqual(404, response.status)
def test_restricted_no_token(self):
fileAlias, url = self.get_restricted_file_and_public_url()
# The file should not be able to be opened - we haven't allocated a
# token. When the token is wrong or stale a 404 is given (to avoid
# disclosure about what content we hold. Alternatively a 401 could be
# given (as long as we give a 401 when the file is missing as well -
# but that requires some more complex changes in the deployment
# infrastructure to permit more backend knowledge of the frontend
# request.
self.require404(url)
def test_restricted_made_up_token(self):
fileAlias, url = self.get_restricted_file_and_public_url()
# The file should not be able to be opened - the token supplied
# is not one we issued.
self.require404(url + '?token=haxx0r')
def test_restricted_with_token(self):
fileAlias, url = self.get_restricted_file_and_public_url()
# We have the base url for a restricted file; grant access to it
# for a short time.
token = TimeLimitedToken.allocate(url)
url = url + "?token=%s" % token
# Now we should be able to access the file.
fileObj = urlopen(url)
try:
self.assertEqual("a"*12, fileObj.read())
finally:
fileObj.close()
def test_restricted_with_expired_token(self):
fileAlias, url = self.get_restricted_file_and_public_url()
# We have the base url for a restricted file; grant access to it
# for a short time.
token = TimeLimitedToken.allocate(url)
# But time has passed
store = session_store()
tokens = store.find(TimeLimitedToken, TimeLimitedToken.token==token)
tokens.set(
TimeLimitedToken.created==SQL("created - interval '1 week'"))
url = url + "?token=%s" % token
# Now, as per test_restricted_no_token we should get a 404.
self.require404(url)
def test_restricted_file_headers(self):
fileAlias, url = self.get_restricted_file_and_public_url()
token = TimeLimitedToken.allocate(url)
url = url + "?token=%s" % token
# Change the date_created to a known value for testing.
file_alias = IMasterStore(LibraryFileAlias).get(
LibraryFileAlias, fileAlias)
file_alias.date_created = datetime(
2001, 01, 30, 13, 45, 59, tzinfo=pytz.utc)
# Commit the update.
self.commit()
# Fetch the file via HTTP, recording the interesting headers
result = urlopen(url)
last_modified_header = result.info()['Last-Modified']
cache_control_header = result.info()['Cache-Control']
# No caching for restricted files.
self.failUnlessEqual(cache_control_header, 'max-age=0, private')
# And we should have a correct Last-Modified header too.
self.failUnlessEqual(
last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
# Perhaps we should also set Expires to the Last-Modified.
def require404(self, url):
"""Assert that opening `url` raises a 404."""
try:
urlopen(url)
self.fail('404 not raised')
except HTTPError, e:
self.failUnlessEqual(e.code, 404)
class LibrarianZopelessWebTestCase(LibrarianWebTestCase):
layer = LaunchpadZopelessLayer
def setUp(self):
LaunchpadZopelessLayer.switchDbUser(config.librarian.dbuser)
def commit(self):
LaunchpadZopelessLayer.commit()
def test_accessTime(self):
# Test to ensure the Librarian updates last_accessed as specced
# when files are retrieved via the web.
# We only test this under Zopeless because we need to connect as
# a non-standard database user, and because there doesn't seem
# any point running this test under both environments.
# XXX: Stuart Bishop 2007-04-11 bug=4613: Disabled due to Bug #4613.
return
# Add a file.
client = LibrarianClient()
filename = 'sample.txt'
id1 = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
self.commit()
# Manually force last accessed time to be some time way in the
# past, so that it'll be very clear if it's updated or not
# (otherwise, depending on the resolution of clocks and things,
# an immediate access might not look any newer).
LibraryFileAlias.get(id1).last_accessed = datetime(
2004, 1, 1, 12, 0, 0, tzinfo=pytz.timezone('Australia/Sydney'))
self.commit()
# Check that last_accessed is updated when the file is accessed
# over the web.
access_time_1 = LibraryFileAlias.get(id1).last_accessed
client = LibrarianClient()
url = client.getURLForAlias(id1)
urlopen(url).close()
self.commit()
access_time_2 = LibraryFileAlias.get(id1).last_accessed
self.failUnless(access_time_1 < access_time_2)
def test_getURLForAliasObject(self):
# getURLForAliasObject returns the same URL as getURLForAlias.
client = LibrarianClient()
content = "Test content"
alias_id = client.addFile(
'test.txt', len(content), StringIO(content),
contentType='text/plain')
self.commit()
alias = getUtility(ILibraryFileAliasSet)[alias_id]
self.assertEqual(
client.getURLForAlias(alias_id),
client.getURLForAliasObject(alias))
class DeletedContentTestCase(unittest.TestCase):
layer = LaunchpadZopelessLayer
def setUp(self):
LaunchpadZopelessLayer.switchDbUser(config.librarian.dbuser)
def test_deletedContentNotFound(self):
# Use a user with rights to change the deleted flag in the db.
# This currently means a superuser.
LaunchpadZopelessLayer.switchDbUser('testadmin')
alias = getUtility(ILibraryFileAliasSet).create(
'whatever', 8, StringIO('xxx\nxxx\n'), 'text/plain')
alias_id = alias.id
transaction.commit()
client = LibrarianClient()
# This works
alias = getUtility(ILibraryFileAliasSet)[alias_id]
alias.open()
alias.read()
alias.close()
# And it can be retrieved via the web
url = alias.http_url
retrieved_content = urlopen(url).read()
self.failUnlessEqual(retrieved_content, 'xxx\nxxx\n')
# But when we flag the content as deleted
cur = cursor()
cur.execute("""
UPDATE LibraryFileAlias SET content=NULL WHERE id=%s
""", (alias.id, ))
transaction.commit()
# Things become not found
alias = getUtility(ILibraryFileAliasSet)[alias_id]
self.failUnlessRaises(DownloadFailed, alias.open)
# And people see a 404 page
try:
urlopen(url)
self.fail('404 not raised')
except HTTPError, x:
self.failUnlessEqual(x.code, 404)
|