~launchpad-pqm/launchpad/devel

12000.2.2 by Jonathan Lange
Bump
1
# Copyright 2009-2010 Canonical Ltd.  This software is licensed under the
8687.15.17 by Karl Fogel
Add the copyright header block to the rest of the files under lib/lp/.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
3
12000.2.18 by Jonathan Lange
Move metaclass declaration to a better place.
4
__metaclass__ = type
12000.2.15 by Jonathan Lange
Document the public names
5
__all__ = [
6
    'DistroMirrorProber',
7
    ]
8
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
9
from datetime import datetime
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
10
import httplib
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
11
import itertools
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
12
import logging
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
13
import os
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
14
from StringIO import StringIO
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
15
import urllib2
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
16
import urlparse
17
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
18
from twisted.internet import (
19
    defer,
20
    protocol,
21
    reactor,
22
    )
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
23
from twisted.internet.defer import DeferredSemaphore
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
24
from twisted.python.failure import Failure
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
25
from twisted.web.http import HTTPClient
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
26
from zope.component import getUtility
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
27
14605.1.1 by Curtis Hovey
Moved canonical.config to lp.services.
28
from lp.services.config import config
14600.2.2 by Curtis Hovey
Moved webapp to lp.services.
29
from lp.services.webapp import canonical_url
13130.1.12 by Curtis Hovey
Sorted imports.
30
from lp.app.interfaces.launchpad import ILaunchpadCelebrities
8322.4.1 by Curtis Hovey
Moved the distributionmirror objects to lp.registry.
31
from lp.registry.interfaces.distributionmirror import (
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
32
    IDistributionMirrorSet,
33
    MirrorContent,
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
34
    MirrorFreshness,
35
    UnableToFetchCDImageFileList,
36
    )
8322.4.1 by Curtis Hovey
Moved the distributionmirror objects to lp.registry.
37
from lp.registry.interfaces.distroseries import IDistroSeries
14578.2.1 by William Grant
Move librarian stuff from canonical.launchpad to lp.services.librarian. canonical.librarian remains untouched.
38
from lp.services.librarian.interfaces import ILibraryFileAliasSet
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
39
from lp.soyuz.interfaces.distroarchseries import IDistroArchSeries
40
3691.214.12 by Guilherme Salgado
Fix https://launchpad.net/launchpad/+bug/71132 (Stop probing a given mirror after we get a certain number of time outs on that mirror)
41
# The requests/timeouts ratio has to be at least 3 for us to keep issuing
3691.214.16 by Guilherme Salgado
A bunch of small fixes suggested by Andrew
42
# requests on a given host. (This ratio is per run, rather than held long
43
# term)
3691.214.31 by Guilherme Salgado
Tweak the timeout ration and the number of simultaneous requests on the mirror prober
44
# IMPORTANT: Changing these values can cause lots of false negatives when
45
# probing mirrors, so please don't change them unless you know what you're
46
# doing.
3691.214.12 by Guilherme Salgado
Fix https://launchpad.net/launchpad/+bug/71132 (Stop probing a given mirror after we get a certain number of time outs on that mirror)
47
MIN_REQUEST_TIMEOUT_RATIO = 3
3691.214.31 by Guilherme Salgado
Tweak the timeout ration and the number of simultaneous requests on the mirror prober
48
MIN_REQUESTS_TO_CONSIDER_RATIO = 30
49
4664.1.1 by Curtis Hovey
Normalized comments for bug 3732.
50
# XXX Guilherme Salgado 2007-01-30 bug=82201:
51
# We need to get rid of these global dicts in this module.
3691.214.12 by Guilherme Salgado
Fix https://launchpad.net/launchpad/+bug/71132 (Stop probing a given mirror after we get a certain number of time outs on that mirror)
52
host_requests = {}
53
host_timeouts = {}
54
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
55
MAX_REDIRECTS = 3
56
4606.2.1 by Guilherme Salgado
Limit the number of overall simultaneous connections when running the mirror prober.
57
# Number of simultaneous requests we issue on a given host.
3691.214.31 by Guilherme Salgado
Tweak the timeout ration and the number of simultaneous requests on the mirror prober
58
# IMPORTANT: Don't change this unless you really know what you're doing. Using
4606.2.1 by Guilherme Salgado
Limit the number of overall simultaneous connections when running the mirror prober.
59
# a too big value can cause spurious failures on lots of mirrors and a too
60
# small one can cause the prober to run for hours.
3691.214.31 by Guilherme Salgado
Tweak the timeout ration and the number of simultaneous requests on the mirror prober
61
PER_HOST_REQUESTS = 2
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
62
4606.2.1 by Guilherme Salgado
Limit the number of overall simultaneous connections when running the mirror prober.
63
# We limit the overall number of simultaneous requests as well to prevent
64
# them from stalling and timing out before they even get a chance to
65
# start connecting.
4606.2.2 by Guilherme Salgado
Change back to 100 simultaneous requests as that's what we've been trying on staging
66
OVERALL_REQUESTS = 100
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
67
10074.1.3 by Jonathan Davies
Created a LoggingMixin class that has our common logMessage function and made
68
69
class LoggingMixin:
10074.1.13 by Jonathan Davies
Added docstrings for LoggingMixin and methods.
70
    """Common logging class for archive and releases mirror messages."""
10074.1.8 by Jonathan Davies
Created _getTime function for LoggingMixin so that we can also pass fake times
71
72
    def _getTime(self):
10074.1.13 by Jonathan Davies
Added docstrings for LoggingMixin and methods.
73
        """Return the current UTC time."""
10074.1.11 by Jonathan Davies
Use UTC time instead of the local time on the server.
74
        return datetime.utcnow()
10074.1.8 by Jonathan Davies
Created _getTime function for LoggingMixin so that we can also pass fake times
75
10074.1.3 by Jonathan Davies
Created a LoggingMixin class that has our common logMessage function and made
76
    def logMessage(self, message):
10074.1.13 by Jonathan Davies
Added docstrings for LoggingMixin and methods.
77
        """Append a UTC timestamp to the message returned by the mirror
78
        prober.
79
        """
10074.1.8 by Jonathan Davies
Created _getTime function for LoggingMixin so that we can also pass fake times
80
        timestamp = datetime.ctime(self._getTime())
10074.1.3 by Jonathan Davies
Created a LoggingMixin class that has our common logMessage function and made
81
        self.log_file.write(timestamp + ": " + message)
82
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
83
84
class RequestManager:
85
86
    overall_semaphore = DeferredSemaphore(OVERALL_REQUESTS)
87
88
    # Yes, I want a mutable class attribute because I want changes done in an
89
    # instance to be visible in other instances as well.
90
    host_locks = {}
91
92
    def run(self, host, probe_func):
93
        # Use a MultiLock with one semaphore limiting the overall
94
        # connections and another limiting the per-host connections.
95
        if host in self.host_locks:
96
            multi_lock = self.host_locks[host]
97
        else:
98
            multi_lock = MultiLock(
99
                self.overall_semaphore, DeferredSemaphore(PER_HOST_REQUESTS))
100
            self.host_locks[host] = multi_lock
101
        return multi_lock.run(probe_func)
4606.2.1 by Guilherme Salgado
Limit the number of overall simultaneous connections when running the mirror prober.
102
103
104
class MultiLock(defer._ConcurrencyPrimitive):
5206.9.7 by Guilherme Salgado
Some changes suggested by Brad
105
    """Lock that acquires multiple underlying locks before it is acquired."""
4606.2.1 by Guilherme Salgado
Limit the number of overall simultaneous connections when running the mirror prober.
106
107
    def __init__(self, overall_lock, host_lock):
108
        defer._ConcurrencyPrimitive.__init__(self)
109
        self.overall_lock = overall_lock
110
        self.host_lock = host_lock
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
111
        # host_lock will always be the scarcer resource, so it should be the
112
        # first to be acquired.
113
        self._locks = [host_lock, overall_lock]
4606.2.1 by Guilherme Salgado
Limit the number of overall simultaneous connections when running the mirror prober.
114
115
    def acquire(self):
116
        return defer.gatherResults([lock.acquire() for lock in self._locks])
117
118
    def release(self):
119
        for lock in self._locks:
120
            lock.release()
121
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
122
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
123
class ProberProtocol(HTTPClient):
124
    """Simple HTTP client to probe path existence via HEAD."""
125
126
    def connectionMade(self):
127
        """Simply requests path presence."""
128
        self.makeRequest()
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
129
        self.headers = {}
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
130
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
131
    def makeRequest(self):
132
        """Request path presence via HTTP/1.1 using HEAD.
133
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
134
        Uses factory.connect_host and factory.connect_path
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
135
        """
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
136
        self.sendCommand('HEAD', self.factory.connect_path)
137
        self.sendHeader('HOST', self.factory.connect_host)
10735.1.5 by Jonathan Davies
Appended '( https://launchpad.net/ )' to our User-Agent string.
138
        self.sendHeader('User-Agent',
139
            'Launchpad Mirror Prober ( https://launchpad.net/ )')
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
140
        self.endHeaders()
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
141
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
142
    def handleStatus(self, version, status, message):
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
143
        # According to http://lists.debian.org/deity/2001/10/msg00046.html,
144
        # apt intentionally handles only '200 OK' responses, so we do the
145
        # same here.
146
        if status == str(httplib.OK):
147
            self.factory.succeeded(status)
148
        else:
149
            self.factory.failed(Failure(BadResponseCode(status)))
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
150
        self.transport.loseConnection()
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
151
152
    def handleResponse(self, response):
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
153
        # The status is all we need, so we don't need to do anything with
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
154
        # the response
155
        pass
156
157
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
158
class RedirectAwareProberProtocol(ProberProtocol):
159
    """A specialized version of ProberProtocol that follows HTTP redirects."""
160
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
161
    redirected_to_location = False
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
162
163
    # The different redirect statuses that I handle.
164
    handled_redirect_statuses = (
165
        httplib.MOVED_PERMANENTLY, httplib.FOUND, httplib.SEE_OTHER)
166
167
    def handleHeader(self, key, value):
168
        key = key.lower()
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
169
        l = self.headers.setdefault(key, [])
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
170
        l.append(value)
171
172
    def handleStatus(self, version, status, message):
173
        if int(status) in self.handled_redirect_statuses:
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
174
            # We need to redirect to the location specified in the headers.
175
            self.redirected_to_location = True
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
176
        else:
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
177
            # We have the result immediately.
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
178
            ProberProtocol.handleStatus(self, version, status, message)
179
180
    def handleEndHeaders(self):
3691.51.5 by Guilherme Salgado
Some more fixes and tests suggested by Andrew
181
        assert self.redirected_to_location, (
182
            'All headers received but failed to find a result.')
183
184
        # Server responded redirecting us to another location.
185
        location = self.headers.get('location')
186
        url = location[0]
187
        self.factory.redirect(url)
188
        self.transport.loseConnection()
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
189
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
190
191
class ProberFactory(protocol.ClientFactory):
192
    """Factory using ProberProtocol to probe single URL existence."""
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
193
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
194
    protocol = ProberProtocol
195
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
196
    # Details of the URL of the host in which we actually want to request the
197
    # confirmation from.
198
    request_scheme = None
199
    request_host = None
200
    request_port = None
201
    request_path = None
202
203
    # Details of the URL of the host in which we'll connect, which will only
204
    # be different from request_* in case we have an http_proxy environment
205
    # variable --in that case the scheme, host and port will be the ones
206
    # extracted from http_proxy and the path will be self.url
207
    connect_scheme = None
208
    connect_host = None
209
    connect_port = None
210
    connect_path = None
211
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
212
    def __init__(self, url, timeout=config.distributionmirrorprober.timeout):
3691.9.10 by Guilherme Salgado
Rename ProberFactory.deferred to ProberFactory._deferred to ensure callsites don't use it directly and, as a consequence, _cancelTimeout is always the first callback in the chain
213
        # We want the deferred to be a private attribute (_deferred) to make
214
        # sure our clients will only use the deferred returned by the probe()
215
        # method; this is to ensure self._cancelTimeout is always the first
216
        # callback in the chain.
217
        self._deferred = defer.Deferred()
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
218
        self.timeout = timeout
5985.2.2 by Guilherme Salgado
Initialize timeoutCall to None in ProberFactory's __init__()
219
        self.timeoutCall = None
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
220
        self.setURL(url.encode('ascii'))
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
221
222
    def probe(self):
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
223
        logger = logging.getLogger('distributionmirror-prober')
3691.214.24 by Guilherme Salgado
Implement the new +countrymirrors-archive page, together with some fixes to its underlying bits, new/improved tests and a new argument to the prober script, which causes it to not connect to anything other than localhost
224
        # NOTE: We don't want to issue connections to any outside host when
225
        # running the mirror prober in a development machine, so we do this
226
        # hack here.
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
227
        if (self.connect_host != 'localhost'
3691.214.24 by Guilherme Salgado
Implement the new +countrymirrors-archive page, together with some fixes to its underlying bits, new/improved tests and a new argument to the prober script, which causes it to not connect to anything other than localhost
228
            and config.distributionmirrorprober.localhost_only):
229
            reactor.callLater(0, self.succeeded, '200')
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
230
            logger.debug("Forging a successful response on %s as we've been "
231
                         "told to probe only local URLs." % self.url)
3691.214.24 by Guilherme Salgado
Implement the new +countrymirrors-archive page, together with some fixes to its underlying bits, new/improved tests and a new argument to the prober script, which causes it to not connect to anything other than localhost
232
            return self._deferred
233
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
234
        if should_skip_host(self.request_host):
3691.214.15 by Guilherme Salgado
some final tweaks after more discussion
235
            reactor.callLater(0, self.failed, ConnectionSkipped(self.url))
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
236
            logger.debug("Skipping %s as we've had too many timeouts on this "
237
                         "host already." % self.url)
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
238
            return self._deferred
3691.214.24 by Guilherme Salgado
Implement the new +countrymirrors-archive page, together with some fixes to its underlying bits, new/improved tests and a new argument to the prober script, which causes it to not connect to anything other than localhost
239
3691.51.5 by Guilherme Salgado
Some more fixes and tests suggested by Andrew
240
        self.connect()
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
241
        logger.debug('Probing %s' % self.url)
3691.9.10 by Guilherme Salgado
Rename ProberFactory.deferred to ProberFactory._deferred to ensure callsites don't use it directly and, as a consequence, _cancelTimeout is always the first callback in the chain
242
        return self._deferred
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
243
3691.51.5 by Guilherme Salgado
Some more fixes and tests suggested by Andrew
244
    def connect(self):
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
245
        host_requests[self.request_host] += 1
246
        reactor.connectTCP(self.connect_host, self.connect_port, self)
5985.2.2 by Guilherme Salgado
Initialize timeoutCall to None in ProberFactory's __init__()
247
        if self.timeoutCall is not None and self.timeoutCall.active():
5985.2.1 by Guilherme Salgado
Change ProberFactory.connect() to cancel the existing timeout call if it exists and is active.
248
            self._cancelTimeout(None)
5876.1.5 by Guilherme Salgado
Change the mirror prober to setup the timeout call inside its connect() mehtod. That'll get rid of the Unhandled error in deferred messages we get when running the testsuite.
249
        self.timeoutCall = reactor.callLater(
250
            self.timeout, self.failWithTimeoutError)
251
        self._deferred.addBoth(self._cancelTimeout)
3691.51.5 by Guilherme Salgado
Some more fixes and tests suggested by Andrew
252
5755.1.4 by Stuart Bishop
Disconnect only if we have a connector, silencing non-fatal test suite noise
253
    connector = None
254
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
255
    def failWithTimeoutError(self):
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
256
        host_timeouts[self.request_host] += 1
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
257
        self.failed(ProberTimeout(self.url, self.timeout))
5755.1.4 by Stuart Bishop
Disconnect only if we have a connector, silencing non-fatal test suite noise
258
        if self.connector is not None:
259
            self.connector.disconnect()
2908.4.16 by Guilherme Salgado
Change a lot of tests to use twisted's facilities and improve some of them. Also refactor lots of code and fix some things as per Andrew's review.
260
261
    def startedConnecting(self, connector):
262
        self.connector = connector
263
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
264
    def succeeded(self, status):
3691.9.10 by Guilherme Salgado
Rename ProberFactory.deferred to ProberFactory._deferred to ensure callsites don't use it directly and, as a consequence, _cancelTimeout is always the first callback in the chain
265
        self._deferred.callback(status)
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
266
267
    def failed(self, reason):
3691.9.10 by Guilherme Salgado
Rename ProberFactory.deferred to ProberFactory._deferred to ensure callsites don't use it directly and, as a consequence, _cancelTimeout is always the first callback in the chain
268
        self._deferred.errback(reason)
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
269
270
    def _cancelTimeout(self, result):
271
        if self.timeoutCall.active():
272
            self.timeoutCall.cancel()
273
        return result
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
274
275
    def setURL(self, url):
276
        self.url = url
3691.214.18 by Guilherme Salgado
Make the mirror prober run fast again
277
        scheme, host, port, path = _parse(url)
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
278
        # XXX Guilherme Salgado 2006-09-19:
4664.1.1 by Curtis Hovey
Normalized comments for bug 3732.
279
        # We don't actually know how to handle FTP responses, but we
3691.51.31 by Guilherme Salgado
Change the mirror prober to treat FTP URLs as if they were HTTP ones and rely on the fact that we're behind an squid proxy which knows what to do with our HEAD requests
280
        # expect to be behind a squid HTTP proxy with the patch at
281
        # http://www.squid-cache.org/bugs/show_bug.cgi?id=1758 applied. So, if
282
        # you encounter any problems with FTP URLs you'll probably have to nag
283
        # the sysadmins to fix squid for you.
284
        if scheme not in ('http', 'ftp'):
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
285
            raise UnknownURLScheme(url)
3691.214.18 by Guilherme Salgado
Make the mirror prober run fast again
286
2908.4.9 by Guilherme Salgado
Implementation of the mirror prober and a bunch other things
287
        if scheme and host:
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
288
            self.request_scheme = scheme
289
            self.request_host = host
290
            self.request_port = port
291
            self.request_path = path
3691.214.18 by Guilherme Salgado
Make the mirror prober run fast again
292
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
293
        if self.request_host not in host_requests:
294
            host_requests[self.request_host] = 0
295
        if self.request_host not in host_timeouts:
296
            host_timeouts[self.request_host] = 0
3691.214.18 by Guilherme Salgado
Make the mirror prober run fast again
297
298
        # If the http_proxy variable is set, we want to use it as the host
299
        # we're going to connect to.
300
        proxy = os.getenv('http_proxy')
301
        if proxy:
302
            scheme, host, port, dummy = _parse(proxy)
303
            path = url
304
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
305
        self.connect_scheme = scheme
306
        self.connect_host = host
307
        self.connect_port = port
308
        self.connect_path = path
3691.214.20 by Guilherme Salgado
merge from rf
309
310
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
311
class RedirectAwareProberFactory(ProberFactory):
312
313
    protocol = RedirectAwareProberProtocol
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
314
    redirection_count = 0
3691.51.2 by Guilherme Salgado
New ProberFactory/ProberProtocol that knows how to follow redirects
315
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
316
    def redirect(self, url):
317
        self.timeoutCall.reset(self.timeout)
3691.51.3 by Guilherme Salgado
Change the prober to use the new redirect-aware factory/protocol, add checks to detect infinite loops and schemes that we don't know how to probe as well as more tests
318
7222.1.1 by Guilherme Salgado
Fix the bug? maybe.
319
        scheme, host, port, orig_path = _parse(self.url)
320
        scheme, host, port, new_path = _parse(url)
321
        if orig_path.split('/')[-1] != new_path.split('/')[-1]:
7222.1.2 by Guilherme Salgado
couple fixes.
322
            # Server redirected us to a file which doesn't seem to be what we
7222.1.3 by Guilherme Salgado
Re-enable the test I had previously disabled.
323
            # requested.  It's likely to be a stupid server which redirects
7222.1.2 by Guilherme Salgado
couple fixes.
324
            # instead of 404ing (https://launchpad.net/bugs/204460).
7222.1.1 by Guilherme Salgado
Fix the bug? maybe.
325
            self.failed(Failure(RedirectToDifferentFile(orig_path, new_path)))
7222.1.3 by Guilherme Salgado
Re-enable the test I had previously disabled.
326
            return
7222.1.1 by Guilherme Salgado
Fix the bug? maybe.
327
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
328
        try:
329
            if self.redirection_count >= MAX_REDIRECTS:
330
                raise InfiniteLoopDetected()
331
            self.redirection_count += 1
332
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
333
            logger = logging.getLogger('distributionmirror-prober')
334
            logger.debug('Got redirected from %s to %s' % (self.url, url))
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
335
            # XXX Guilherme Salgado 2007-04-23 bug=109223:
4664.1.1 by Curtis Hovey
Normalized comments for bug 3732.
336
            # We can't assume url to be absolute here.
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
337
            self.setURL(url)
12021.2.1 by j.c.sackett
Switched to logging.
338
        except (UnknownURLScheme,), e:
12021.2.6 by j.c.sackett
Lint fixes.
339
            # Since we've got the UnknownURLScheme after a redirect, we need
340
            # to raise it in a form that can be ignored in the layer above.
12021.2.5 by j.c.sackett
Updated ignored exceptions on the CD Image Mirror class and tests.
341
            self.failed(UnknownURLSchemeAfterRedirect(url))
12021.2.4 by j.c.sackett
Added cancel timeout.
342
        except (InfiniteLoopDetected,), e:
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
343
            self.failed(e)
12021.2.1 by j.c.sackett
Switched to logging.
344
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
345
        else:
3691.51.5 by Guilherme Salgado
Some more fixes and tests suggested by Andrew
346
            self.connect()
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
347
348
349
class ProberError(Exception):
350
    """A generic prober error.
351
352
    This class should be used as a base for more specific prober errors.
353
    """
354
355
356
class ProberTimeout(ProberError):
357
    """The initialized URL did not return in time."""
358
359
    def __init__(self, url, timeout, *args):
360
        self.url = url
361
        self.timeout = timeout
362
        ProberError.__init__(self, *args)
363
364
    def __str__(self):
3691.51.32 by Guilherme Salgado
Fix the prober to always check ALL official mirrors (disabled or not), but keep the disabled ones out of public mirror listings
365
        return ("HEAD request on %s took longer than %s seconds"
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
366
                % (self.url, self.timeout))
367
368
369
class BadResponseCode(ProberError):
2908.4.16 by Guilherme Salgado
Change a lot of tests to use twisted's facilities and improve some of them. Also refactor lots of code and fix some things as per Andrew's review.
370
371
    def __init__(self, status, *args):
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
372
        ProberError.__init__(self, *args)
2908.4.16 by Guilherme Salgado
Change a lot of tests to use twisted's facilities and improve some of them. Also refactor lots of code and fix some things as per Andrew's review.
373
        self.status = status
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
374
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
375
    def __str__(self):
376
        return "Bad response code: %s" % self.status
377
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
378
7222.1.1 by Guilherme Salgado
Fix the bug? maybe.
379
class RedirectToDifferentFile(ProberError):
380
381
    def __init__(self, orig_path, new_path, *args):
382
        ProberError.__init__(self, *args)
383
        self.orig_path = orig_path
384
        self.new_path = new_path
385
386
    def __str__(self):
387
        return ("Attempt to redirect to a different file; from %s to %s"
388
                % (self.orig_path, self.new_path))
389
390
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
391
class InfiniteLoopDetected(ProberError):
3691.51.3 by Guilherme Salgado
Change the prober to use the new redirect-aware factory/protocol, add checks to detect infinite loops and schemes that we don't know how to probe as well as more tests
392
393
    def __str__(self):
394
        return "Infinite loop detected"
395
396
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
397
class ConnectionSkipped(ProberError):
398
399
    def __str__(self):
3691.214.16 by Guilherme Salgado
A bunch of small fixes suggested by Andrew
400
        return ("Connection skipped because of too many timeouts on this "
3691.214.18 by Guilherme Salgado
Make the mirror prober run fast again
401
                "host. It will be retried on the next probing run.")
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
402
403
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
404
class UnknownURLScheme(ProberError):
3691.51.3 by Guilherme Salgado
Change the prober to use the new redirect-aware factory/protocol, add checks to detect infinite loops and schemes that we don't know how to probe as well as more tests
405
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
406
    def __init__(self, url, *args):
3691.51.4 by Guilherme Salgado
Some suggestions made by Andrew on his review and some other things
407
        ProberError.__init__(self, *args)
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
408
        self.url = url
3691.51.3 by Guilherme Salgado
Change the prober to use the new redirect-aware factory/protocol, add checks to detect infinite loops and schemes that we don't know how to probe as well as more tests
409
410
    def __str__(self):
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
411
        return ("The mirror prober doesn't know how to check this kind of "
412
                "URLs: %s" % self.url)
3691.51.3 by Guilherme Salgado
Change the prober to use the new redirect-aware factory/protocol, add checks to detect infinite loops and schemes that we don't know how to probe as well as more tests
413
414
12021.2.5 by j.c.sackett
Updated ignored exceptions on the CD Image Mirror class and tests.
415
class UnknownURLSchemeAfterRedirect(UnknownURLScheme):
416
417
    def __str__(self):
418
        return ("The mirror prober was redirected to: %s. It doesn't know how"
419
                "to check this kind of URL." % self.url)
420
12021.2.6 by j.c.sackett
Lint fixes.
421
10074.1.3 by Jonathan Davies
Created a LoggingMixin class that has our common logMessage function and made
422
class ArchiveMirrorProberCallbacks(LoggingMixin):
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
423
3691.214.15 by Guilherme Salgado
some final tweaks after more discussion
424
    expected_failures = (BadResponseCode, ProberTimeout, ConnectionSkipped)
425
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
426
    def __init__(self, mirror, series, pocket, component, url, log_file):
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
427
        self.mirror = mirror
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
428
        self.series = series
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
429
        self.pocket = pocket
430
        self.component = component
431
        self.url = url
432
        self.log_file = log_file
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
433
        if IDistroArchSeries.providedBy(series):
434
            self.mirror_class_name = 'MirrorDistroArchSeries'
435
            self.deleteMethod = self.mirror.deleteMirrorDistroArchSeries
436
            self.ensureMethod = self.mirror.ensureMirrorDistroArchSeries
437
        elif IDistroSeries.providedBy(series):
438
            self.mirror_class_name = 'MirrorDistroSeries'
439
            self.deleteMethod = self.mirror.deleteMirrorDistroSeriesSource
440
            self.ensureMethod = self.mirror.ensureMirrorDistroSeriesSource
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
441
        else:
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
442
            raise AssertionError('series must provide either '
443
                                 'IDistroArchSeries or IDistroSeries.')
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
444
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
445
    def deleteMirrorSeries(self, failure):
446
        """Delete the mirror for self.series, self.pocket and self.component.
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
447
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
448
        If the failure we get from twisted is not a timeout, a bad response
449
        code or a connection skipped, then this failure is propagated.
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
450
        """
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
451
        self.deleteMethod(self.series, self.pocket, self.component)
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
452
        msg = ('Deleted %s of %s with url %s because: %s.\n'
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
453
               % (self.mirror_class_name,
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
454
                  self._getSeriesPocketAndComponentDescription(), self.url,
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
455
                  failure.getErrorMessage()))
10074.1.2 by Jonathan Davies
Changed log messages to call new logMessage() function rather than
456
        self.logMessage(msg)
3691.214.15 by Guilherme Salgado
some final tweaks after more discussion
457
        failure.trap(*self.expected_failures)
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
458
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
459
    def ensureMirrorSeries(self, http_status):
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
460
        """Make sure we have a mirror for self.series, self.pocket and
2908.4.16 by Guilherme Salgado
Change a lot of tests to use twisted's facilities and improve some of them. Also refactor lots of code and fix some things as per Andrew's review.
461
        self.component.
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
462
        """
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
463
        msg = ('Ensuring %s of %s with url %s exists in the database.\n'
464
               % (self.mirror_class_name,
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
465
                  self._getSeriesPocketAndComponentDescription(),
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
466
                  self.url))
2908.4.16 by Guilherme Salgado
Change a lot of tests to use twisted's facilities and improve some of them. Also refactor lots of code and fix some things as per Andrew's review.
467
        mirror = self.ensureMethod(
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
468
            self.series, self.pocket, self.component)
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
469
10074.1.2 by Jonathan Davies
Changed log messages to call new logMessage() function rather than
470
        self.logMessage(msg)
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
471
        return mirror
472
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
473
    def updateMirrorFreshness(self, arch_or_source_mirror):
474
        """Update the freshness of this MirrorDistro{ArchSeries,SeriesSource}.
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
475
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
476
        This is done by issuing HTTP HEAD requests on that mirror looking for
477
        some packages found in our publishing records. Then, knowing what
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
478
        packages the mirror contains and when these packages were published,
479
        we can have an idea of when that mirror was last updated.
480
        """
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
481
        # The errback that's one level before this callback in the chain will
3691.214.15 by Guilherme Salgado
some final tweaks after more discussion
482
        # return None if it gets any of self.expected_failures as the error,
3525.1.12 by Guilherme Salgado
Fix the mirror prober to not start the timeout counting too early.
483
        # so we need to check that here.
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
484
        if arch_or_source_mirror is None:
485
            return
486
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
487
        scheme, host, port, path = _parse(self.url)
8322.4.4 by Curtis Hovey
Fixed some lint issues.
488
        freshness_url_map = arch_or_source_mirror.getURLsToCheckUpdateness()
489
        if not freshness_url_map or should_skip_host(host):
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
490
            # Either we have no publishing records for self.series,
3691.214.14 by Guilherme Salgado
Some changes suggested by Bjorn
491
            # self.pocket and self.component or we got too may timeouts from
492
            # this host and thus should skip it, so it's better to delete this
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
493
            # MirrorDistroArchSeries/MirrorDistroSeriesSource than to keep
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
494
            # it with an UNKNOWN freshness.
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
495
            self.deleteMethod(self.series, self.pocket, self.component)
3525.1.5 by Guilherme Salgado
Loads of changes to summarize the arch/source releases that a mirror contains and to display the releases/flavours that a release mirror contains.
496
            return
497
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
498
        request_manager = RequestManager()
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
499
        deferredList = []
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
500
        # We start setting the freshness to unknown, and then we move on
501
        # trying to find one of the recently published packages mirrored
502
        # there.
503
        arch_or_source_mirror.freshness = MirrorFreshness.UNKNOWN
8322.4.4 by Curtis Hovey
Fixed some lint issues.
504
        for freshness, url in freshness_url_map.items():
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
505
            prober = ProberFactory(url)
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
506
            deferred = request_manager.run(prober.request_host, prober.probe)
3691.9.10 by Guilherme Salgado
Rename ProberFactory.deferred to ProberFactory._deferred to ensure callsites don't use it directly and, as a consequence, _cancelTimeout is always the first callback in the chain
507
            deferred.addCallback(
5206.9.7 by Guilherme Salgado
Some changes suggested by Brad
508
                self.setMirrorFreshness, arch_or_source_mirror, freshness,
509
                url)
3691.9.10 by Guilherme Salgado
Rename ProberFactory.deferred to ProberFactory._deferred to ensure callsites don't use it directly and, as a consequence, _cancelTimeout is always the first callback in the chain
510
            deferred.addErrback(self.logError, url)
511
            deferredList.append(deferred)
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
512
        return defer.DeferredList(deferredList)
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
513
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
514
    def setMirrorFreshness(
515
            self, http_status, arch_or_source_mirror, freshness, url):
516
        """Update the freshness of the given arch or source mirror.
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
517
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
518
        The freshness is changed only if the given freshness refers to a more
2908.4.16 by Guilherme Salgado
Change a lot of tests to use twisted's facilities and improve some of them. Also refactor lots of code and fix some things as per Andrew's review.
519
        recent date than the current one.
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
520
        """
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
521
        if freshness < arch_or_source_mirror.freshness:
5206.9.7 by Guilherme Salgado
Some changes suggested by Brad
522
            msg = ('Found that %s exists. Updating %s of %s freshness to '
523
                   '%s.\n' % (url, self.mirror_class_name,
524
                              self._getSeriesPocketAndComponentDescription(),
525
                              freshness.title))
10074.1.2 by Jonathan Davies
Changed log messages to call new logMessage() function rather than
526
            self.logMessage(msg)
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
527
            arch_or_source_mirror.freshness = freshness
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
528
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
529
    def _getSeriesPocketAndComponentDescription(self):
530
        """Return a string containing the name of the series, pocket and
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
531
        component.
532
533
        This is meant to be used in the logs, to help us identify if this is a
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
534
        MirrorDistroSeriesSource or a MirrorDistroArchSeries.
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
535
        """
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
536
        if IDistroArchSeries.providedBy(self.series):
537
            text = ("Series %s, Architecture %s" %
538
                    (self.series.distroseries.title,
539
                     self.series.architecturetag))
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
540
        else:
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
541
            text = "Series %s" % self.series.title
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
542
        text += (", Component %s and Pocket %s" %
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
543
                 (self.component.name, self.pocket.title))
544
        return text
545
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
546
    def logError(self, failure, url):
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
547
        msg = ("%s on %s of %s\n"
3424.1.10 by Guilherme Salgado
Make the mirror prober honour the http_proxy env var, change it to issue batches of 50 requests instead of all at once and some other small things
548
               % (failure.getErrorMessage(), url,
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
549
                  self._getSeriesPocketAndComponentDescription()))
3691.214.15 by Guilherme Salgado
some final tweaks after more discussion
550
        if failure.check(*self.expected_failures) is not None:
10074.1.6 by Jonathan Davies
Replaced log_file.write() with logMessage() for logError() too.
551
            self.logMessage(msg)
2908.4.19 by Guilherme Salgado
Fixed last issues, improved tests and make it possible for mirror owners to download the log file.
552
        else:
553
            # This is not an error we expect from an HTTP server, so we log it
554
            # using the cronscript's logger and wait for kiko to complain
555
            # about it.
556
            logger = logging.getLogger('distributionmirror-prober')
557
            logger.error(msg)
2908.4.12 by Guilherme Salgado
Lots of fixes Andrew suggested, fixed DB patch from Stuart and some tests to the mirror prober.
558
        return None
559
10074.1.3 by Jonathan Davies
Created a LoggingMixin class that has our common logMessage function and made
560
561
class MirrorCDImageProberCallbacks(LoggingMixin):
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
562
12021.2.5 by j.c.sackett
Updated ignored exceptions on the CD Image Mirror class and tests.
563
    expected_failures = (
564
        BadResponseCode,
12021.2.9 by j.c.sackett
Reordered per style guidelines and review.
565
        ConnectionSkipped,
12021.2.5 by j.c.sackett
Updated ignored exceptions on the CD Image Mirror class and tests.
566
        ProberTimeout,
12400.1.1 by William Grant
Some mirrors redirect to a 404 page instead of 404ing, so RedirectToDifferentFile is an expected failure.
567
        RedirectToDifferentFile,
12021.2.5 by j.c.sackett
Updated ignored exceptions on the CD Image Mirror class and tests.
568
        UnknownURLSchemeAfterRedirect,
569
        )
3691.214.15 by Guilherme Salgado
some final tweaks after more discussion
570
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
571
    def __init__(self, mirror, distroseries, flavour, log_file):
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
572
        self.mirror = mirror
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
573
        self.distroseries = distroseries
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
574
        self.flavour = flavour
575
        self.log_file = log_file
10074.1.5 by Jonathan Davies
Removed trailing whitespace from new line.
576
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
577
    def ensureOrDeleteMirrorCDImageSeries(self, result):
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
578
        """Check if the result of the deferredList contains only success and
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
579
        then ensure we have a MirrorCDImageSeries for self.distroseries and
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
580
        self.flavour.
581
582
        If result contains one or more failures, then we ensure that
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
583
        MirrorCDImageSeries is deleted.
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
584
        """
585
        for success_or_failure, response in result:
586
            if success_or_failure == defer.FAILURE:
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
587
                self.mirror.deleteMirrorCDImageSeries(
588
                    self.distroseries, self.flavour)
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
589
                if response.check(*self.expected_failures) is None:
590
                    msg = ("%s on mirror %s. Check its logfile for more "
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
591
                           "details.\n"
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
592
                           % (response.getErrorMessage(), self.mirror.name))
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
593
                    # This is not an error we expect from an HTTP server, so
594
                    # we log it using the cronscript's logger and wait for
3691.214.43 by Guilherme Salgado
Make the mirror prober more robust by simply logging unexpected exceptions rather than re-raising them and also make it more verbose.
595
                    # kiko to complain about it.
596
                    logger = logging.getLogger('distributionmirror-prober')
597
                    logger.error(msg)
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
598
                return None
599
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
600
        mirror = self.mirror.ensureMirrorCDImageSeries(
601
            self.distroseries, self.flavour)
10074.1.2 by Jonathan Davies
Changed log messages to call new logMessage() function rather than
602
        self.logMessage(
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
603
            "Found all ISO images for series %s and flavour %s.\n"
604
            % (self.distroseries.title, self.flavour))
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
605
        return mirror
606
607
    def logMissingURL(self, failure, url):
10074.1.7 by Jonathan Davies
Replaced log_file.write() with logMessage() here too.
608
        self.logMessage(
3525.1.1 by Guilherme Salgado
Make the mirror prober probe release mirrors, improve the UI a lot and lots of other fixes.
609
            "Failed %s: %s\n" % (url, failure.getErrorMessage()))
610
        return failure
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
611
612
3691.214.40 by Guilherme Salgado
Fix a bunch of trivial mirror prober bugs: 46662, 68395 and 107473
613
def _build_request_for_cdimage_file_list(url):
614
    headers = {'Pragma': 'no-cache', 'Cache-control': 'no-cache'}
615
    return urllib2.Request(url, headers=headers)
616
617
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
618
def _get_cdimage_file_list():
4285.2.5 by Mark Shuttleworth
Test fixes for renamed series
619
    url = config.distributionmirrorprober.cdimage_file_list_url
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
620
    try:
3691.214.40 by Guilherme Salgado
Fix a bunch of trivial mirror prober bugs: 46662, 68395 and 107473
621
        return urllib2.urlopen(_build_request_for_cdimage_file_list(url))
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
622
    except urllib2.URLError, e:
623
        raise UnableToFetchCDImageFileList(
624
            'Unable to fetch %s: %s' % (url, e))
625
626
3691.214.40 by Guilherme Salgado
Fix a bunch of trivial mirror prober bugs: 46662, 68395 and 107473
627
def restore_http_proxy(http_proxy):
628
    """Restore the http_proxy environment variable to the given value."""
629
    if http_proxy is None:
630
        try:
631
            del os.environ['http_proxy']
632
        except KeyError:
633
            pass
634
    else:
635
        os.environ['http_proxy'] = http_proxy
636
637
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
638
def get_expected_cdimage_paths():
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
639
    """Get all paths where we can find CD image files on a cdimage mirror.
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
640
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
641
    Return a list containing, for each Ubuntu DistroSeries and flavour, a
642
    list of CD image file paths for that DistroSeries and flavour.
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
643
644
    This list is read from a file located at http://releases.ubuntu.com,
645
    so if something goes wrong while reading that file, an
646
    UnableToFetchCDImageFileList exception will be raised.
647
    """
648
    d = {}
649
    for line in _get_cdimage_file_list().readlines():
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
650
        flavour, seriesname, path, size = line.split('\t')
651
        paths = d.setdefault((flavour, seriesname), [])
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
652
        paths.append(path)
653
654
    ubuntu = getUtility(ILaunchpadCelebrities).ubuntu
655
    paths = []
4429.2.1 by Guilherme Salgado
Order the results of get_expected_cdimage_paths
656
    for key, value in sorted(d.items()):
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
657
        flavour, seriesname = key
658
        series = ubuntu.getSeries(seriesname)
659
        paths.append((series, flavour, value))
3691.51.37 by Guilherme Salgado
Some fixes and tests requested by kiko
660
    return paths
661
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
662
663
def checkComplete(result, key, unchecked_keys):
664
    """Check if we finished probing all mirrors, and call reactor.stop()."""
665
    unchecked_keys.remove(key)
666
    if not len(unchecked_keys):
667
        reactor.callLater(0, reactor.stop)
668
    # This is added to the deferred with addBoth(), which means it'll be
669
    # called if something goes wrong in the end of the callback chain, and in
670
    # that case we shouldn't swallow the error.
671
    return result
672
673
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
674
def probe_archive_mirror(mirror, logfile, unchecked_keys, logger):
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
675
    """Probe an archive mirror for its contents and freshness.
676
677
    First we issue a set of HTTP HEAD requests on some key files to find out
678
    what is mirrored there, then we check if some packages that we know the
679
    publishing time are available on that mirror, giving us an idea of when it
680
    was last synced to the main archive.
681
    """
682
    packages_paths = mirror.getExpectedPackagesPaths()
683
    sources_paths = mirror.getExpectedSourcesPaths()
684
    all_paths = itertools.chain(packages_paths, sources_paths)
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
685
    request_manager = RequestManager()
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
686
    for series, pocket, component, path in all_paths:
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
687
        url = "%s/%s" % (mirror.base_url, path)
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
688
        callbacks = ArchiveMirrorProberCallbacks(
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
689
            mirror, series, pocket, component, url, logfile)
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
690
        unchecked_keys.append(url)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
691
        prober = ProberFactory(url)
692
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
693
        deferred = request_manager.run(prober.request_host, prober.probe)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
694
        deferred.addCallbacks(
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
695
            callbacks.ensureMirrorSeries, callbacks.deleteMirrorSeries)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
696
5206.9.1 by Guilherme Salgado
Add a status column to DistributionMirror so that we can track pending-review mirrors. Also rename a few other columns and add an extra page which lists all mirrors that are pending review.
697
        deferred.addCallback(callbacks.updateMirrorFreshness)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
698
        deferred.addErrback(logger.error)
699
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
700
        deferred.addBoth(checkComplete, url, unchecked_keys)
701
702
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
703
def probe_cdimage_mirror(mirror, logfile, unchecked_keys, logger):
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
704
    """Probe a cdimage mirror for its contents.
4785.3.7 by Jeroen Vermeulen
Removed whitespace at ends of lines
705
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
706
    This is done by checking the list of files for each flavour and series
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
707
    returned by get_expected_cdimage_paths(). If a mirror contains all
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
708
    files for a given series and flavour, then we consider that mirror is
709
    actually mirroring that series and flavour.
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
710
    """
3691.214.40 by Guilherme Salgado
Fix a bunch of trivial mirror prober bugs: 46662, 68395 and 107473
711
    # The list of files a mirror should contain will change over time and we
712
    # don't want to keep records for files a mirror doesn't need to have
713
    # anymore, so we delete all records before start probing. This also fixes
714
    # https://launchpad.net/bugs/46662
9760.8.1 by Brad Crittenden
Change the non-English 'serieses' to 'series' throughout our codebase.
715
    mirror.deleteAllMirrorCDImageSeries()
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
716
    try:
717
        cdimage_paths = get_expected_cdimage_paths()
718
    except UnableToFetchCDImageFileList, e:
719
        logger.error(e)
720
        return
721
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
722
    for series, flavour, paths in cdimage_paths:
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
723
        callbacks = MirrorCDImageProberCallbacks(
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
724
            mirror, series, flavour, logfile)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
725
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
726
        mirror_key = (series, flavour)
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
727
        unchecked_keys.append(mirror_key)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
728
        deferredList = []
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
729
        request_manager = RequestManager()
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
730
        for path in paths:
731
            url = '%s/%s' % (mirror.base_url, path)
732
            # Use a RedirectAwareProberFactory because CD mirrors are allowed
733
            # to redirect, and we need to cope with that.
734
            prober = RedirectAwareProberFactory(url)
4606.2.3 by Guilherme Salgado
Get rid of some globals in scripts/distributionmirror_prober.py, make the code a bit more readable and split tests for the new MultiLock
735
            deferred = request_manager.run(prober.request_host, prober.probe)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
736
            deferred.addErrback(callbacks.logMissingURL, url)
737
            deferredList.append(deferred)
738
739
        deferredList = defer.DeferredList(deferredList, consumeErrors=True)
4285.2.1 by Mark Shuttleworth
Massive renaming of distrorelease to distroseries
740
        deferredList.addCallback(callbacks.ensureOrDeleteMirrorCDImageSeries)
3691.214.13 by Guilherme Salgado
Properly fix the bug this time and add more tests
741
        deferredList.addCallback(checkComplete, mirror_key, unchecked_keys)
3691.217.1 by Guilherme Salgado
Fix https://launchpad.net/products/launchpad/+bug/54791 (The mirror prober should check a few files from each mirror in paralel instead of a lot of files from a single mirror)
742
3691.214.21 by Guilherme Salgado
Some more tests and comments as requested by Andrew
743
744
def should_skip_host(host):
745
    """Return True if the requests/timeouts ratio on this host is too low."""
746
    requests = host_requests[host]
747
    timeouts = host_timeouts[host]
748
    if timeouts == 0 or requests < MIN_REQUESTS_TO_CONSIDER_RATIO:
749
        return False
750
    else:
751
        ratio = float(requests) / timeouts
752
        return ratio < MIN_REQUEST_TIMEOUT_RATIO
753
754
755
def _parse(url, defaultPort=80):
756
    """Parse the given URL returning the scheme, host, port and path."""
757
    scheme, host, path, dummy, dummy, dummy = urlparse.urlparse(url)
758
    port = defaultPort
759
    if ':' in host:
760
        host, port = host.split(':')
761
        assert port.isdigit()
762
        port = int(port)
763
    return scheme, host, port, path
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
764
765
766
class DistroMirrorProber:
767
    """Main entry point for the distribution mirror prober."""
768
12000.2.16 by Jonathan Lange
Correct test failure
769
    def __init__(self, txn, logger):
770
        self.txn = txn
771
        self.logger = logger
772
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
773
    def _sanity_check_mirror(self, mirror):
774
        """Check that the given mirror is official and has an http_base_url.
775
        """
776
        assert mirror.isOfficial(), (
777
            'Non-official mirrors should not be probed')
778
        if mirror.base_url is None:
779
            self.logger.warning(
780
                "Mirror '%s' of distribution '%s' doesn't have a base URL; "
781
                "we can't probe it." % (
782
                    mirror.name, mirror.distribution.name))
783
            return False
784
        return True
785
786
    def _create_probe_record(self, mirror, logfile):
787
        """Create a probe record for the given mirror with the given logfile.
788
        """
789
        logfile.seek(0)
790
        filename = '%s-probe-logfile.txt' % mirror.name
791
        log_file = getUtility(ILibraryFileAliasSet).create(
792
            name=filename, size=len(logfile.getvalue()),
793
            file=logfile, contentType='text/plain')
794
        mirror.newProbeRecord(log_file)
795
12000.2.19 by Jonathan Lange
Docstring, better name
796
    def probe(self, content_type, no_remote_hosts, ignore_last_probe,
797
              max_mirrors, notify_owner):
798
        """Probe distribution mirrors.
799
800
        :param content_type: The type of mirrored content, as a
801
            `MirrorContent`.
802
        :param no_remote_hosts: If True, restrict access to localhost.
803
        :param ignore_last_probe: If True, ignore the results of the last
804
            probe and probe again anyway.
805
        :param max_mirrors: The maximum number of mirrors to probe. If None,
806
            no maximum.
807
        :param notify_owner: Send failure notification to the owners of the
808
            mirrors.
809
        """
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
810
        if content_type == MirrorContent.ARCHIVE:
811
            probe_function = probe_archive_mirror
812
        elif content_type == MirrorContent.RELEASE:
813
            probe_function = probe_cdimage_mirror
814
        else:
815
            raise ValueError(
816
                "Unrecognized content_type: %s" % (content_type,))
817
12000.2.16 by Jonathan Lange
Correct test failure
818
        self.txn.begin()
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
819
820
        # To me this seems better than passing the no_remote_hosts value
821
        # through a lot of method/function calls, until it reaches the probe()
822
        # method. (salgado)
823
        if no_remote_hosts:
824
            localhost_only_conf = """
825
                [distributionmirrorprober]
826
                localhost_only: True
827
                """
828
            config.push('localhost_only_conf', localhost_only_conf)
829
12000.2.16 by Jonathan Lange
Correct test failure
830
        self.logger.info('Probing %s Mirrors' % content_type.title)
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
831
832
        mirror_set = getUtility(IDistributionMirrorSet)
833
        results = mirror_set.getMirrorsToProbe(
834
            content_type, ignore_last_probe=ignore_last_probe,
835
            limit=max_mirrors)
836
        mirror_ids = [mirror.id for mirror in results]
837
        unchecked_keys = []
838
        logfiles = {}
839
        probed_mirrors = []
840
841
        for mirror_id in mirror_ids:
842
            mirror = mirror_set[mirror_id]
843
            if not self._sanity_check_mirror(mirror):
844
                continue
845
846
            # XXX: salgado 2006-05-26:
847
            # Some people registered mirrors on distros other than Ubuntu back
848
            # in the old times, so now we need to do this small hack here.
849
            if not mirror.distribution.full_functionality:
12400.1.3 by William Grant
Skipping a non-Ubuntu mirror is a routine event. It's not a generally useful thing to log, so drop it to debug.
850
                self.logger.debug(
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
851
                    "Mirror '%s' of distribution '%s' can't be probed --we "
852
                    "only probe Ubuntu mirrors."
853
                    % (mirror.name, mirror.distribution.name))
854
                continue
855
856
            probed_mirrors.append(mirror)
857
            logfile = StringIO()
858
            logfiles[mirror_id] = logfile
859
            probe_function(mirror, logfile, unchecked_keys, self.logger)
860
861
        if probed_mirrors:
862
            reactor.run()
12000.2.16 by Jonathan Lange
Correct test failure
863
            self.logger.info('Probed %d mirrors.' % len(probed_mirrors))
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
864
        else:
12000.2.16 by Jonathan Lange
Correct test failure
865
            self.logger.info('No mirrors to probe.')
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
866
867
        disabled_mirrors = []
868
        reenabled_mirrors = []
869
        # Now that we finished probing all mirrors, we check if any of these
870
        # mirrors appear to have no content mirrored, and, if so, mark them as
871
        # disabled and notify their owners.
872
        expected_iso_images_count = len(get_expected_cdimage_paths())
873
        for mirror in probed_mirrors:
874
            log = logfiles[mirror.id]
875
            self._create_probe_record(mirror, log)
876
            if mirror.shouldDisable(expected_iso_images_count):
877
                if mirror.enabled:
878
                    log.seek(0)
879
                    mirror.disable(notify_owner, log.getvalue())
880
                    disabled_mirrors.append(canonical_url(mirror))
881
            else:
882
                # Ensure the mirror is enabled, so that it shows up on public
883
                # mirror listings.
884
                if not mirror.enabled:
885
                    mirror.enabled = True
886
                    reenabled_mirrors.append(canonical_url(mirror))
887
888
        if disabled_mirrors:
12000.2.16 by Jonathan Lange
Correct test failure
889
            self.logger.info(
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
890
                'Disabling %s mirror(s): %s'
891
                % (len(disabled_mirrors), ", ".join(disabled_mirrors)))
892
        if reenabled_mirrors:
12000.2.16 by Jonathan Lange
Correct test failure
893
            self.logger.info(
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
894
                'Re-enabling %s mirror(s): %s'
895
                % (len(reenabled_mirrors), ", ".join(reenabled_mirrors)))
896
        # XXX: salgado 2007-04-03:
897
        # This should be done in LaunchpadScript.lock_and_run() when
898
        # the isolation used is ISOLATION_LEVEL_AUTOCOMMIT. Also note
899
        # that replacing this with a flush_database_updates() doesn't
900
        # have the same effect, it seems.
12000.2.16 by Jonathan Lange
Correct test failure
901
        self.txn.commit()
12000.2.13 by Jonathan Lange
Move a bunch of the logic out of the cronscript and into the module
902
12000.2.16 by Jonathan Lange
Correct test failure
903
        self.logger.info('Done.')