~launchpad-pqm/launchpad/devel

13811.1.1 by Jeroen Vermeulen
More lint.
1
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
8687.15.17 by Karl Fogel
Add the copyright header block to the rest of the files under lib/lp/.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
2770.1.56 by Guilherme Salgado
Cleanup loads of code, improved some tests and got all of them to work.
3
3550.1.11 by Andrew Bennetts
Better fix for 41414.
4
__metaclass__ = type
5
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
6
import httplib
3450.1.2 by Guilherme Salgado
Refactor BranchToMirror.mirror() into smaller methods and add tests for BranchToMirror's error handling code.
7
import socket
3691.396.4 by David Allouche
simple bugfixes, duh!
8
import sys
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
9
import urllib2
2770.1.55 by Guilherme Salgado
Copy jblack's supermirror pull script files into lib/canonical/launchpad/scripts/supermirror
10
14612.2.10 by William Grant
Fix some lp.codehosting imports to be first again. It loads bzr plugins :(
11
# FIRST Ensure correct plugins are loaded. Do not delete this comment or the
12
# line below this comment.
13
import lp.codehosting
13261.7.1 by Jelmer Vernooij
Update to a newer version of bzr, 2.4b4.
14
13604.1.8 by Jelmer Vernooij
BranchPolicy -> BranchOpenPolicy.
15
from bzrlib import (
16
    errors,
13604.1.10 by Jelmer Vernooij
More moving things around.
17
    urlutils,
13604.1.8 by Jelmer Vernooij
BranchPolicy -> BranchOpenPolicy.
18
    )
13261.7.11 by Jelmer Vernooij
Format imports.
19
from bzrlib.branch import Branch
13261.7.1 by Jelmer Vernooij
Update to a newer version of bzr, 2.4b4.
20
from bzrlib.bzrdir import BzrDir
13811.1.1 by Jeroen Vermeulen
More lint.
21
from bzrlib.plugins.loom.branch import LoomSupport
13261.7.11 by Jelmer Vernooij
Format imports.
22
from bzrlib.plugins.weave_fmt.branch import BzrBranchFormat4
13261.7.1 by Jelmer Vernooij
Update to a newer version of bzr, 2.4b4.
23
from bzrlib.plugins.weave_fmt.repository import (
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
24
    RepositoryFormat4,
25
    RepositoryFormat5,
26
    RepositoryFormat6,
27
    )
13811.1.1 by Jeroen Vermeulen
More lint.
28
from bzrlib.transport import get_transport
13604.1.10 by Jelmer Vernooij
More moving things around.
29
import bzrlib.ui
7755.2.1 by Michael Hudson
test passes
30
from bzrlib.ui import SilentUIFactory
13604.1.10 by Jelmer Vernooij
More moving things around.
31
from lazr.uri import (
32
    InvalidURIError,
33
    URI,
34
    )
2770.1.55 by Guilherme Salgado
Copy jblack's supermirror pull script files into lib/canonical/launchpad/scripts/supermirror
35
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
36
from lp.code.bzr import (
37
    BranchFormat,
38
    RepositoryFormat,
39
    )
40
from lp.code.enums import BranchType
9478.1.4 by Tim Penhey
Move identical_formats and get_vfs_format_classes into bzrutils.
41
from lp.codehosting.bzrutils import identical_formats
42
from lp.codehosting.puller import get_lock_id_for_branch_id
13604.1.6 by Jelmer Vernooij
Move SafeBranchOpener to its own file.
43
from lp.codehosting.safe_open import (
13604.1.10 by Jelmer Vernooij
More moving things around.
44
    BadUrl,
13604.1.6 by Jelmer Vernooij
Move SafeBranchOpener to its own file.
45
    BranchLoopError,
13604.1.9 by Jelmer Vernooij
Make BranchMirrorPolicy derive from BranchOpenPolicy.
46
    BranchOpenPolicy,
13604.1.6 by Jelmer Vernooij
Move SafeBranchOpener to its own file.
47
    BranchReferenceForbidden,
48
    SafeBranchOpener,
49
    )
14612.2.10 by William Grant
Fix some lp.codehosting imports to be first again. It loads bzr plugins :(
50
from lp.services.config import config
51
from lp.services.webapp import errorlog
3691.429.5 by David Allouche
branch puller fails early for ssh, bzr+ssh and launchpad.net urls
52
53
4792.2.10 by David Allouche
_checkBranchReference raises if a branch reference is found and the branch_type does not allow them.
54
__all__ = [
13604.1.10 by Jelmer Vernooij
More moving things around.
55
    'BadUrlLaunchpad',
56
    'BadUrlScheme',
57
    'BadUrlSsh',
6999.4.26 by Jonathan Lange
Opener is now a Mirrorer.
58
    'BranchMirrorer',
13604.1.10 by Jelmer Vernooij
More moving things around.
59
    'BranchMirrorerPolicy',
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
60
    'get_canonical_url_for_branch_name',
5294.2.8 by Michael Hudson
OOMPH!
61
    'install_worker_ui_factory',
4898.2.43 by Jonathan Lange
worker.BranchToMirror -> worker.PullerWorker.
62
    'PullerWorker',
6832.1.8 by Michael Hudson
more tests
63
    'PullerWorkerProtocol',
4792.2.10 by David Allouche
_checkBranchReference raises if a branch reference is found and the branch_type does not allow them.
64
    ]
3691.429.5 by David Allouche
branch puller fails early for ssh, bzr+ssh and launchpad.net urls
65
66
13604.1.10 by Jelmer Vernooij
More moving things around.
67
class BadUrlSsh(BadUrl):
68
    """Tried to access a branch from sftp or bzr+ssh."""
69
70
71
class BadUrlLaunchpad(BadUrl):
72
    """Tried to access a branch from launchpad.net."""
73
74
75
class BadUrlScheme(BadUrl):
76
    """Found a URL with an untrusted scheme."""
77
78
    def __init__(self, scheme, url):
79
        BadUrl.__init__(self, scheme, url)
80
        self.scheme = scheme
81
82
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
83
def get_canonical_url_for_branch_name(unique_name):
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
84
    """Custom implementation of canonical_url(branch) for error reporting.
85
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
86
    The actual `canonical_url` function cannot be used because we do not have
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
87
    access to real content objects.
88
    """
5773.4.6 by Curtis Hovey
Added support for vhost to lazr and launchpad.
89
    if config.vhosts.use_https:
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
90
        scheme = 'https'
91
    else:
92
        scheme = 'http'
5773.4.6 by Curtis Hovey
Added support for vhost to lazr and launchpad.
93
    hostname = config.vhost.code.hostname
6999.4.35 by Jonathan Lange
Oops, another extra tilde.
94
    return scheme + '://' + hostname + '/' + unique_name
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
95
96
97
class PullerWorkerProtocol:
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
98
    """The protocol used to communicate with the puller scheduler.
99
100
    This protocol notifies the scheduler of events such as startMirroring,
9590.1.71 by Michael Hudson
merge the actual changes to the puller from the next pipe
101
    branchChanged and mirrorFailed.
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
102
    """
103
104
    def __init__(self, output):
105
        self.out_stream = output
4898.2.9 by Jonathan Lange
Make it so that BranchToMirror takes a logger in its constructor, not in mirror()
106
4898.2.18 by Jonathan Lange
Use real netstrings in client.
107
    def sendNetstring(self, string):
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
108
        self.out_stream.write('%d:%s,' % (len(string), string))
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
109
4898.2.68 by jml at canonical
Change the protocol so that the number of expected arguments is sent
110
    def sendEvent(self, command, *args):
111
        self.sendNetstring(command)
112
        self.sendNetstring(str(len(args)))
113
        for argument in args:
114
            self.sendNetstring(str(argument))
115
6832.1.19 by Michael Hudson
remove pointless arguments to startMirroring etc
116
    def startMirroring(self):
4898.2.68 by jml at canonical
Change the protocol so that the number of expected arguments is sent
117
        self.sendEvent('startMirroring')
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
118
9590.1.71 by Michael Hudson
merge the actual changes to the puller from the next pipe
119
    def branchChanged(self, stacked_on_url, revid_before, revid_after,
120
                      control_string, branch_string, repository_string):
121
        self.sendEvent(
122
            'branchChanged', stacked_on_url, revid_before, revid_after,
123
            control_string, branch_string, repository_string)
4898.2.11 by Jonathan Lange
Make it so that the protocol is used for logging and talking to the authserver.
124
6832.1.19 by Michael Hudson
remove pointless arguments to startMirroring etc
125
    def mirrorFailed(self, message, oops_id):
4898.2.68 by jml at canonical
Change the protocol so that the number of expected arguments is sent
126
        self.sendEvent('mirrorFailed', message, oops_id)
4898.2.9 by Jonathan Lange
Make it so that BranchToMirror takes a logger in its constructor, not in mirror()
127
8322.5.1 by Michael Hudson
report progress to the scheduler on network activity
128
    def progressMade(self, type):
129
        # 'type' is ignored; we only care about the type of progress in the
130
        # tests of the progress reporting.
5138.7.3 by jml at canonical
Remove indicator from progressMade in protocol, not needed yet.
131
        self.sendEvent('progressMade')
5138.7.1 by jml at canonical
Add a progressMade method to the protocol.
132
7109.2.1 by Michael Hudson
add the ability to log from the puller worker
133
    def log(self, fmt, *args):
134
        self.sendEvent('log', fmt % args)
135
4898.2.9 by Jonathan Lange
Make it so that BranchToMirror takes a logger in its constructor, not in mirror()
136
13604.1.10 by Jelmer Vernooij
More moving things around.
137
class BranchMirrorerPolicy(BranchOpenPolicy):
13604.1.9 by Jelmer Vernooij
Make BranchMirrorPolicy derive from BranchOpenPolicy.
138
    """The policy for what branches to open and how to stack them."""
139
140
    def createDestinationBranch(self, source_branch, destination_url):
141
        """Create a destination branch for 'source_branch'.
142
143
        Creates a branch at 'destination_url' that is has the same format as
144
        'source_branch'.  Any content already at 'destination_url' will be
145
        deleted.  Generally the new branch will have no revisions, but they
146
        will be copied for import branches, because this can be done safely
13604.1.10 by Jelmer Vernooij
More moving things around.
147
        and efficiently with a vfs-level copy (see `ImportedBranchPolicy`).
13604.1.9 by Jelmer Vernooij
Make BranchMirrorPolicy derive from BranchOpenPolicy.
148
149
        :param source_branch: The Bazaar branch that will be mirrored.
150
        :param destination_url: The place to make the destination branch. This
151
            URL must point to a writable location.
152
        :return: The destination branch.
153
        """
13604.1.11 by Jelmer Vernooij
Fix more tests.
154
        dest_transport = get_transport(destination_url)
155
        if dest_transport.has('.'):
156
            dest_transport.delete_tree('.')
157
        if isinstance(source_branch, LoomSupport):
158
            # Looms suck.
159
            revision_id = None
160
        else:
161
            revision_id = 'null:'
162
        source_branch.bzrdir.clone_on_transport(
163
            dest_transport, revision_id=revision_id)
164
        return Branch.open(destination_url)
13604.1.9 by Jelmer Vernooij
Make BranchMirrorPolicy derive from BranchOpenPolicy.
165
166
    def getStackedOnURLForDestinationBranch(self, source_branch,
167
                                            destination_url):
168
        """Get the stacked on URL for `source_branch`.
169
170
        In particular, the URL it should be stacked on when it is mirrored to
171
        `destination_url`.
172
        """
173
        return None
174
175
13604.1.1 by Jelmer Vernooij
Initial work on SafeBranchOpener.
176
class BranchMirrorer(object):
177
    """A `BranchMirrorer` safely makes mirrors of branches.
178
13604.1.8 by Jelmer Vernooij
BranchPolicy -> BranchOpenPolicy.
179
    A `BranchMirrorer` has a `BranchOpenPolicy` to tell it which URLs are safe
180
    to accesss and whether or not to follow branch references.
13604.1.1 by Jelmer Vernooij
Initial work on SafeBranchOpener.
181
182
    The mirrorer knows how to follow branch references, create new mirrors,
183
    update existing mirrors, determine stacked-on branches and the like.
184
185
    Public methods are `open` and `mirror`.
186
    """
187
188
    def __init__(self, policy, protocol=None, log=None):
189
        """Construct a branch opener with 'policy'.
190
13604.1.8 by Jelmer Vernooij
BranchPolicy -> BranchOpenPolicy.
191
        :param policy: A `BranchOpenPolicy` that tells us what URLs are valid
192
            and similar things.
13604.1.1 by Jelmer Vernooij
Initial work on SafeBranchOpener.
193
        :param log: A callable which can be called with a format string and
194
            arguments to log messages in the scheduler, or None, in which case
195
            log messages are discarded.
196
        """
197
        self.policy = policy
198
        self.protocol = protocol
199
        self.opener = SafeBranchOpener(policy)
200
        if log is not None:
201
            self.log = log
202
        else:
203
            self.log = lambda *args: None
204
6999.4.1 by Jonathan Lange
Move branch creation logic to the opener.
205
    def createDestinationBranch(self, source_branch, destination_url):
206
        """Create a destination branch for 'source_branch'.
207
208
        Creates a branch at 'destination_url' that is a mirror of
209
        'source_branch'. Any content already at 'destination_url' will be
210
        deleted.
211
212
        :param source_branch: The Bazaar branch that will be mirrored.
213
        :param destination_url: The place to make the destination branch. This
214
            URL must point to a writable location.
215
        :return: The destination branch.
216
        """
13604.1.2 by Jelmer Vernooij
Fix tests.
217
        return self.opener.runWithTransformFallbackLocationHookInstalled(
14517.1.3 by Jelmer Vernooij
Re-merge always loading of foreign plugins.
218
            self.policy.createDestinationBranch, source_branch,
9590.1.40 by Michael Hudson
restore some test coverage, fix a fumble-fingered merge
219
            destination_url)
6999.4.1 by Jonathan Lange
Move branch creation logic to the opener.
220
6999.4.2 by Jonathan Lange
Factor out the logic for opening the destination branch.
221
    def openDestinationBranch(self, source_branch, destination_url):
222
        """Open or create the destination branch at 'destination_url'.
223
224
        :param source_branch: The Bazaar branch that will be mirrored.
225
        :param destination_url: The place to make the destination branch. This
226
            URL must point to a writable location.
10686.1.7 by Michael Hudson
increase sense-making
227
        :return: The opened or created branch.
6999.4.2 by Jonathan Lange
Factor out the logic for opening the destination branch.
228
        """
229
        try:
230
            branch = Branch.open(destination_url)
9353.1.2 by Michael Hudson
the simplest fix
231
        except (errors.NotBranchError, errors.IncompatibleRepositories):
6999.4.2 by Jonathan Lange
Factor out the logic for opening the destination branch.
232
            # Make a new branch in the same format as the source branch.
233
            return self.createDestinationBranch(
7109.4.6 by Michael Hudson
test and more working
234
                source_branch, destination_url)
6999.4.2 by Jonathan Lange
Factor out the logic for opening the destination branch.
235
        # Check that destination branch is in the same format as the source.
236
        if identical_formats(source_branch, branch):
7109.4.6 by Michael Hudson
test and more working
237
            return branch
7109.2.1 by Michael Hudson
add the ability to log from the puller worker
238
        self.log('Formats differ.')
7109.4.6 by Michael Hudson
test and more working
239
        return self.createDestinationBranch(source_branch, destination_url)
6999.4.2 by Jonathan Lange
Factor out the logic for opening the destination branch.
240
6999.4.6 by Jonathan Lange
Remove spurious definition of get_stacked_on_url and move code for
241
    def updateBranch(self, source_branch, dest_branch):
6999.4.52 by Jonathan Lange
Expand many docstrings.
242
        """Bring 'dest_branch' up-to-date with 'source_branch'.
243
7167.9.25 by Michael Hudson
some comments and tidyings, XXXs for all that remains to do
244
        This method pulls 'source_branch' into 'dest_branch' and sets the
6999.4.52 by Jonathan Lange
Expand many docstrings.
245
        stacked-on URL of 'dest_branch' to match 'source_branch'.
246
247
        This method assumes that 'source_branch' and 'dest_branch' both have
248
        the same format.
249
        """
13604.1.9 by Jelmer Vernooij
Make BranchMirrorPolicy derive from BranchOpenPolicy.
250
        stacked_on_url = self.policy.getStackedOnURLForDestinationBranch(
7177.2.6 by Michael Hudson
fix tests, though not the way i would like thanks to bzr bugs
251
            source_branch, dest_branch.base)
6999.4.6 by Jonathan Lange
Remove spurious definition of get_stacked_on_url and move code for
252
        try:
7177.2.6 by Michael Hudson
fix tests, though not the way i would like thanks to bzr bugs
253
            dest_branch.set_stacked_on_url(stacked_on_url)
254
        except (errors.UnstackableRepositoryFormat,
9353.1.4 by Michael Hudson
test and fix for a similar problem with mirrored branches
255
                errors.UnstackableBranchFormat,
256
                errors.IncompatibleRepositories):
7177.2.6 by Michael Hudson
fix tests, though not the way i would like thanks to bzr bugs
257
            stacked_on_url = None
258
        if stacked_on_url is None:
259
            # We use stacked_on_url == '' to mean "no stacked on location"
260
            # because XML-RPC doesn't support None.
261
            stacked_on_url = ''
262
        dest_branch.pull(source_branch, overwrite=True)
9590.1.71 by Michael Hudson
merge the actual changes to the puller from the next pipe
263
        return stacked_on_url
6999.4.6 by Jonathan Lange
Remove spurious definition of get_stacked_on_url and move code for
264
6999.4.30 by Jonathan Lange
Move the mirroring code to the mirrorer.
265
    def mirror(self, source_branch, destination_url):
266
        """Mirror 'source_branch' to 'destination_url'."""
7109.4.6 by Michael Hudson
test and more working
267
        branch = self.openDestinationBranch(source_branch, destination_url)
9590.1.23 by Michael Hudson
small steps in several directions
268
        revid_before = branch.last_revision()
7109.4.6 by Michael Hudson
test and more working
269
        # If the branch is locked, try to break it. Our special UI factory
270
        # will allow the breaking of locks that look like they were left
271
        # over from previous puller worker runs. We will block on other
272
        # locks and fail if they are not broken before the timeout expires
273
        # (currently 5 minutes).
274
        if branch.get_physical_lock_status():
275
            branch.break_lock()
9590.1.71 by Michael Hudson
merge the actual changes to the puller from the next pipe
276
        stacked_on_url = self.updateBranch(source_branch, branch)
277
        return branch, revid_before, stacked_on_url
6999.4.30 by Jonathan Lange
Move the mirroring code to the mirrorer.
278
13604.1.1 by Jelmer Vernooij
Initial work on SafeBranchOpener.
279
    def open(self, url):
280
        return self.opener.open(url)
281
6832.1.6 by Michael Hudson
progress towards separating the url checking from the worker object
282
4898.2.43 by Jonathan Lange
worker.BranchToMirror -> worker.PullerWorker.
283
class PullerWorker:
2976.8.4 by Robert Collins
Reviewer requested updates.
284
    """This class represents a single branch that needs mirroring.
285
3850.1.7 by jml at canonical
BranchToMirror now takes a unique_name, and we use it to generate the
286
    It has a source URL, a destination URL, a database id, a unique name and a
2976.8.4 by Robert Collins
Reviewer requested updates.
287
    status client which is used to report on the mirror progress.
288
    """
2770.1.62 by Guilherme Salgado
Huge cleanup done on a pair programming session with Rob.
289
6832.3.1 by Michael Hudson
review fixes
290
    def _checkerForBranchType(self, branch_type):
13604.1.10 by Jelmer Vernooij
More moving things around.
291
        """Return a `BranchMirrorer` with an appropriate policy.
6999.4.52 by Jonathan Lange
Expand many docstrings.
292
293
        :param branch_type: A `BranchType`. The policy of the mirrorer will
294
            be based on this.
295
        :return: A `BranchMirrorer`.
296
        """
8871.3.3 by Jeroen Vermeulen
Factored out BranchMirrorer factory in codehosting. Fixed up DirectBranchCommit to use it. Also, made test start the server before useBzrBranches.
297
        return make_branch_mirrorer(
298
            branch_type, protocol=self.protocol,
299
            mirror_stacked_on_url=self.default_stacked_on_url)
6832.3.1 by Michael Hudson
review fixes
300
4898.2.15 by Jonathan Lange
Make the protocol a parameter of BranchToMirror
301
    def __init__(self, src, dest, branch_id, unique_name, branch_type,
14104.6.23 by Robert Collins
Nuke setOopsToken unneeded in a concurrency safe world.
302
                 default_stacked_on_url, protocol, branch_mirrorer=None):
6832.1.12 by Michael Hudson
moar docstrings
303
        """Construct a `PullerWorker`.
304
305
        :param src: The URL to pull from.
306
        :param dest: The URL to pull into.
307
        :param branch_id: The database ID of the branch we're pulling.
308
        :param unique_name: The unique_name of the branch we're pulling
309
            (without the tilde).
310
        :param branch_type: A member of the BranchType enum.  It is expected
311
            that tests that do not depend on its value will pass None.
6999.4.38 by Jonathan Lange
Call it default_stacked_on_url, because we're including the leading
312
        :param default_stacked_on_url: The unique name of the default
6999.4.16 by Jonathan Lange
Always call it default_stacked_on_branch rather than
313
            stacked-on branch for the product of the branch we are mirroring.
314
            None or '' if there is no such branch.
6832.1.12 by Michael Hudson
moar docstrings
315
        :param protocol: An instance of `PullerWorkerProtocol`.
10100.1.22 by Jonathan Lange
flakes
316
        :param branch_mirrorer: An instance of `BranchMirrorer`.  If not
317
            passed, one will be chosen based on the value of `branch_type`.
6832.1.12 by Michael Hudson
moar docstrings
318
        """
2770.1.62 by Guilherme Salgado
Huge cleanup done on a pair programming session with Rob.
319
        self.source = src
320
        self.dest = dest
2770.1.65 by Guilherme Salgado
lots of fixes
321
        self.branch_id = branch_id
4792.2.38 by David Allouche
Rename branch_unique_name to unique_name in BranchToMirror.
322
        self.unique_name = unique_name
4792.2.5 by David Allouche
Move getTraverseReferences into BranchToMirror.
323
        self.branch_type = branch_type
6999.4.42 by Jonathan Lange
Correctly handle the case where the default stacked-on branch is not set.
324
        if default_stacked_on_url == '':
325
            default_stacked_on_url = None
6999.4.38 by Jonathan Lange
Call it default_stacked_on_url, because we're including the leading
326
        self.default_stacked_on_url = default_stacked_on_url
7109.2.1 by Michael Hudson
add the ability to log from the puller worker
327
        self.protocol = protocol
328
        if protocol is not None:
329
            self.protocol.branch_id = branch_id
6999.4.26 by Jonathan Lange
Opener is now a Mirrorer.
330
        if branch_mirrorer is None:
331
            branch_mirrorer = self._checkerForBranchType(branch_type)
332
        self.branch_mirrorer = branch_mirrorer
6125.21.6 by Jonathan Lange
Pull branches via the lp-internal:/// URL protocol
333
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
334
    def _record_oops(self, message=None):
335
        """Record an oops for the current exception.
336
337
        This must only be called while handling an exception.
338
339
        :param message: custom explanatory error message. Do not use
340
            str(exception) to fill in this parameter, it should only be set
341
            when a human readable error has been explicitly generated.
342
        """
343
        request = errorlog.ScriptRequest([
344
            ('branch_id', self.branch_id), ('source', self.source),
345
            ('dest', self.dest), ('error-explanation', str(message))])
346
        request.URL = get_canonical_url_for_branch_name(self.unique_name)
347
        errorlog.globalErrorUtility.raising(sys.exc_info(), request)
348
        return request.oopsid
349
350
    def _mirrorFailed(self, error):
351
        oops_id = self._record_oops(error)
6832.1.19 by Michael Hudson
remove pointless arguments to startMirroring etc
352
        self.protocol.mirrorFailed(error, oops_id)
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
353
6125.21.5 by Jonathan Lange
Extract a 'mirrorWithoutChecks' function to make testing easier.
354
    def mirrorWithoutChecks(self):
355
        """Mirror the source branch to the destination branch.
356
357
        This method doesn't do any error handling or send any messages via the
6125.21.8 by Jonathan Lange
Add a comment about the utility of mirrorWithoutChecks.
358
        reporting protocol -- a "naked mirror", if you will. This is
359
        particularly useful for tests that want to mirror a branch and be
360
        informed immediately of any errors.
10067.2.1 by Michael Hudson
log revid from before the puller runs
361
362
        :return: ``(branch, revid_before)``, where ``branch`` is the
363
            destination branch and ``revid_before`` was the tip revision
364
            *before* the mirroring process ran.
6125.21.5 by Jonathan Lange
Extract a 'mirrorWithoutChecks' function to make testing easier.
365
        """
8871.3.3 by Jeroen Vermeulen
Factored out BranchMirrorer factory in codehosting. Fixed up DirectBranchCommit to use it. Also, made test start the server before useBzrBranches.
366
        # Avoid circular import
9590.4.5 by Michael Hudson
rename get_puller_server to get_rw_server
367
        from lp.codehosting.vfs import get_rw_server
8871.3.3 by Jeroen Vermeulen
Factored out BranchMirrorer factory in codehosting. Fixed up DirectBranchCommit to use it. Also, made test start the server before useBzrBranches.
368
9590.4.5 by Michael Hudson
rename get_puller_server to get_rw_server
369
        server = get_rw_server()
10197.5.8 by Michael Hudson
and some more
370
        server.start_server()
6125.20.4 by Jonathan Lange
Merge lower thread
371
        try:
6999.4.26 by Jonathan Lange
Opener is now a Mirrorer.
372
            source_branch = self.branch_mirrorer.open(self.source)
6999.4.30 by Jonathan Lange
Move the mirroring code to the mirrorer.
373
            return self.branch_mirrorer.mirror(source_branch, self.dest)
6125.20.4 by Jonathan Lange
Merge lower thread
374
        finally:
10197.5.8 by Michael Hudson
and some more
375
            server.stop_server()
6125.21.5 by Jonathan Lange
Extract a 'mirrorWithoutChecks' function to make testing easier.
376
4898.2.9 by Jonathan Lange
Make it so that BranchToMirror takes a logger in its constructor, not in mirror()
377
    def mirror(self):
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
378
        """Open source and destination branches and pull source into
379
        destination.
380
        """
6832.1.19 by Michael Hudson
remove pointless arguments to startMirroring etc
381
        self.protocol.startMirroring()
3691.396.6 by David Allouche
record oopses for all errors
382
        try:
9590.1.71 by Michael Hudson
merge the actual changes to the puller from the next pipe
383
            dest_branch, revid_before, stacked_on_url = \
384
                self.mirrorWithoutChecks()
2770.1.65 by Guilherme Salgado
lots of fixes
385
        # add further encountered errors from the production runs here
386
        # ------ HERE ---------
387
        #
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
388
        except urllib2.HTTPError, e:
389
            msg = str(e)
390
            if int(e.code) == httplib.UNAUTHORIZED:
3550.1.12 by Andrew Bennetts
Use bzrdir.sprout rather than manually assembling a branch with all the right formats and contents.
391
                # Maybe this will be caught in bzrlib one day, and then we'll
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
392
                # be able to get rid of this.
393
                # https://launchpad.net/products/bzr/+bug/42383
4792.2.17 by David Allouche
Fix a bug with the NotBranchError message, simplify the UNAUTHORIZED error message, test suite cleanups.
394
                msg = "Authentication required."
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
395
            self._mirrorFailed(msg)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
396
397
        except socket.error, e:
398
            msg = 'A socket error occurred: %s' % str(e)
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
399
            self._mirrorFailed(msg)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
400
6889.2.6 by Michael Hudson
just import bzrlib.errors, not some random sampling of names from it
401
        except errors.UnsupportedFormatError, e:
3691.429.3 by David Allouche
fix error messages that used the term "supermirror"
402
            msg = ("Launchpad does not support branches from before "
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
403
                   "bzr 0.7. Please upgrade the branch using bzr upgrade.")
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
404
            self._mirrorFailed(msg)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
405
6889.2.6 by Michael Hudson
just import bzrlib.errors, not some random sampling of names from it
406
        except errors.UnknownFormatError, e:
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
407
            self._mirrorFailed(e)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
408
6889.2.6 by Michael Hudson
just import bzrlib.errors, not some random sampling of names from it
409
        except (errors.ParamikoNotPresent, BadUrlSsh), e:
3691.429.3 by David Allouche
fix error messages that used the term "supermirror"
410
            msg = ("Launchpad cannot mirror branches from SFTP and SSH URLs."
411
                   " Please register a HTTP location for this branch.")
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
412
            self._mirrorFailed(msg)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
413
3691.429.5 by David Allouche
branch puller fails early for ssh, bzr+ssh and launchpad.net urls
414
        except BadUrlLaunchpad:
415
            msg = "Launchpad does not mirror branches from Launchpad."
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
416
            self._mirrorFailed(msg)
3691.429.5 by David Allouche
branch puller fails early for ssh, bzr+ssh and launchpad.net urls
417
6832.1.27 by Michael Hudson
fix the bug!
418
        except BadUrlScheme, e:
419
            msg = "Launchpad does not mirror %s:// URLs." % e.scheme
6832.1.15 by Michael Hudson
some error tests begin to pass
420
            self._mirrorFailed(msg)
421
6889.2.6 by Michael Hudson
just import bzrlib.errors, not some random sampling of names from it
422
        except errors.NotBranchError, e:
423
            hosted_branch_error = errors.NotBranchError(
6999.4.34 by Jonathan Lange
Only one tilde needed.
424
                "lp:%s" % self.unique_name)
4792.2.17 by David Allouche
Fix a bug with the NotBranchError message, simplify the UNAUTHORIZED error message, test suite cleanups.
425
            message_by_type = {
4792.2.42 by David Allouche
Improved code layout in NotBranchError handler in BranchToMirror.
426
                BranchType.HOSTED: str(hosted_branch_error),
4792.2.17 by David Allouche
Fix a bug with the NotBranchError message, simplify the UNAUTHORIZED error message, test suite cleanups.
427
                BranchType.IMPORTED: "Not a branch.",
428
                }
429
            msg = message_by_type.get(self.branch_type, str(e))
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
430
            self._mirrorFailed(msg)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
431
4792.2.16 by David Allouche
Hook in _checkBranchReference and corresponding exception handling.
432
        except BranchReferenceForbidden, e:
5294.2.5 by Michael Hudson
fix lint
433
            msg = ("Branch references are not allowed for branches of type "
434
                   "%s." % (self.branch_type.title,))
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
435
            self._mirrorFailed(msg)
4792.2.16 by David Allouche
Hook in _checkBranchReference and corresponding exception handling.
436
7167.9.3 by Michael Hudson
branch reference tests pass again
437
        except BranchLoopError, e:
4792.2.16 by David Allouche
Hook in _checkBranchReference and corresponding exception handling.
438
            msg = "Circular branch reference."
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
439
            self._mirrorFailed(msg)
4792.2.16 by David Allouche
Hook in _checkBranchReference and corresponding exception handling.
440
6889.2.6 by Michael Hudson
just import bzrlib.errors, not some random sampling of names from it
441
        except errors.BzrError, e:
4966.2.5 by jml at canonical
Reply to most of Andrew's review comments.
442
            self._mirrorFailed(e)
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
443
5138.3.2 by jml at canonical
Make the test pass.
444
        except InvalidURIError, e:
445
            self._mirrorFailed(e)
446
3691.396.12 by David Allouche
review fixes
447
        except (KeyboardInterrupt, SystemExit):
448
            # Do not record OOPS for those exceptions.
449
            raise
450
3450.1.3 by Guilherme Salgado
Add lots of specific except clauses for some of the common errors we identified.
451
        else:
10067.2.1 by Michael Hudson
log revid from before the puller runs
452
            revid_after = dest_branch.last_revision()
9590.1.71 by Michael Hudson
merge the actual changes to the puller from the next pipe
453
            # XXX: Aaron Bentley 2008-06-13
454
            # Bazaar does not provide a public API for learning about
455
            # format markers.  Fix this in Bazaar, then here.
456
            control_string = dest_branch.bzrdir._format.get_format_string()
457
            if dest_branch._format.__class__ is BzrBranchFormat4:
458
                branch_string = BranchFormat.BZR_BRANCH_4.title
459
            else:
460
                branch_string = dest_branch._format.get_format_string()
461
            repository_format = dest_branch.repository._format
462
            if repository_format.__class__ is RepositoryFormat6:
463
                repository_string = RepositoryFormat.BZR_REPOSITORY_6.title
464
            elif repository_format.__class__ is RepositoryFormat5:
465
                repository_string = RepositoryFormat.BZR_REPOSITORY_5.title
466
            elif repository_format.__class__ is RepositoryFormat4:
467
                repository_string = RepositoryFormat.BZR_REPOSITORY_4.title
468
            else:
469
                repository_string = repository_format.get_format_string()
470
            self.protocol.branchChanged(
471
                stacked_on_url, revid_before, revid_after, control_string,
472
                branch_string, repository_string)
2770.1.55 by Guilherme Salgado
Copy jblack's supermirror pull script files into lib/canonical/launchpad/scripts/supermirror
473
2770.1.62 by Guilherme Salgado
Huge cleanup done on a pair programming session with Rob.
474
    def __eq__(self, other):
475
        return self.source == other.source and self.dest == other.dest
2770.1.55 by Guilherme Salgado
Copy jblack's supermirror pull script files into lib/canonical/launchpad/scripts/supermirror
476
2770.1.62 by Guilherme Salgado
Huge cleanup done on a pair programming session with Rob.
477
    def __repr__(self):
4898.2.43 by Jonathan Lange
worker.BranchToMirror -> worker.PullerWorker.
478
        return ("<PullerWorker source=%s dest=%s at %x>" %
2770.1.62 by Guilherme Salgado
Huge cleanup done on a pair programming session with Rob.
479
                (self.source, self.dest, id(self)))
5138.7.13 by Michael Hudson
install an appropriate progress bar.
480
481
8322.5.2 by Michael Hudson
use CONSTANTs rather than string literals
482
WORKER_ACTIVITY_PROGRESS_BAR = 'progress bar'
483
WORKER_ACTIVITY_NETWORK = 'network'
484
10100.1.22 by Jonathan Lange
flakes
485
7755.2.1 by Michael Hudson
test passes
486
class PullerWorkerUIFactory(SilentUIFactory):
5294.2.2 by Michael Hudson
fix the test
487
    """An UIFactory that always says yes to breaking locks."""
488
7755.2.1 by Michael Hudson
test passes
489
    def __init__(self, puller_worker_protocol):
490
        SilentUIFactory.__init__(self)
491
        self.puller_worker_protocol = puller_worker_protocol
492
13081.2.20 by Jelmer Vernooij
confirming the breakage of locks now happens with UIFactory.confirm_action.
493
    def confirm_action(self, prompt, confirmation_id, args):
5294.2.8 by Michael Hudson
OOMPH!
494
        """If we're asked to break a lock like a stale lock of ours, say yes.
5294.2.2 by Michael Hudson
fix the test
495
        """
13081.2.20 by Jelmer Vernooij
confirming the breakage of locks now happens with UIFactory.confirm_action.
496
        assert confirmation_id == 'bzrlib.lockdir.break', \
497
            "Didn't expect confirmation id %r" % (confirmation_id,)
5294.2.8 by Michael Hudson
OOMPH!
498
        branch_id = self.puller_worker_protocol.branch_id
13081.2.20 by Jelmer Vernooij
confirming the breakage of locks now happens with UIFactory.confirm_action.
499
        prompt = prompt % args
5294.2.8 by Michael Hudson
OOMPH!
500
        if get_lock_id_for_branch_id(branch_id) in prompt:
501
            return True
502
        else:
503
            return False
504
7755.2.1 by Michael Hudson
test passes
505
    def _progress_updated(self, task):
8322.5.2 by Michael Hudson
use CONSTANTs rather than string literals
506
        self.puller_worker_protocol.progressMade(WORKER_ACTIVITY_PROGRESS_BAR)
8322.5.1 by Michael Hudson
report progress to the scheduler on network activity
507
508
    def report_transport_activity(self, transport, byte_count, direction):
509
        # <poolie> mwhudson: if you're feeling paranoid i suggest you check
510
        #          the 'action' or whatever it's called is 'read'/'write'
511
        # <poolie> if we add a soft timeout like 'no io for two seconds' then
512
        #          we'd make a new action
513
        if direction in ['read', 'write']:
8322.5.2 by Michael Hudson
use CONSTANTs rather than string literals
514
            self.puller_worker_protocol.progressMade(WORKER_ACTIVITY_NETWORK)
7755.2.1 by Michael Hudson
test passes
515
5294.2.8 by Michael Hudson
OOMPH!
516
517
def install_worker_ui_factory(puller_worker_protocol):
5294.2.12 by Michael Hudson
review comments
518
    """Install a special UIFactory for puller workers.
519
520
    Our factory does two things:
521
522
    1) Create progress bars that inform a PullerWorkerProtocol of progress.
523
    2) Break locks if and only if they appear to be stale locks
524
       created by another puller worker process.
5138.7.16 by Michael Hudson
docstrings and a simplification
525
    """
7755.2.1 by Michael Hudson
test passes
526
    bzrlib.ui.ui_factory = PullerWorkerUIFactory(puller_worker_protocol)
13604.1.10 by Jelmer Vernooij
More moving things around.
527
528
529
class MirroredBranchPolicy(BranchMirrorerPolicy):
530
    """Mirroring policy for MIRRORED branches.
531
532
    In summary:
533
534
     - follow references,
535
     - only open non-Launchpad http: and https: URLs.
536
    """
537
538
    def __init__(self, stacked_on_url=None):
539
        self.stacked_on_url = stacked_on_url
540
541
    def getStackedOnURLForDestinationBranch(self, source_branch,
542
                                            destination_url):
543
        """Return the stacked on URL for the destination branch.
544
545
        Mirrored branches are stacked on the default stacked-on branch of
546
        their product, except when we're mirroring the default stacked-on
547
        branch itself.
548
        """
549
        if self.stacked_on_url is None:
550
            return None
551
        stacked_on_url = urlutils.join(destination_url, self.stacked_on_url)
552
        if destination_url == stacked_on_url:
553
            return None
554
        return self.stacked_on_url
555
556
    def shouldFollowReferences(self):
557
        """See `BranchOpenPolicy.shouldFollowReferences`.
558
559
        We traverse branch references for MIRRORED branches because they
560
        provide a useful redirection mechanism and we want to be consistent
561
        with the bzr command line.
562
        """
563
        return True
564
565
    def transformFallbackLocation(self, branch, url):
566
        """See `BranchOpenPolicy.transformFallbackLocation`.
567
568
        For mirrored branches, we stack on whatever the remote branch claims
569
        to stack on, but this URL still needs to be checked.
570
        """
571
        return urlutils.join(branch.base, url), True
572
573
    def checkOneURL(self, url):
574
        """See `BranchOpenPolicy.checkOneURL`.
575
576
        We refuse to mirror from Launchpad or a ssh-like or file URL.
577
        """
578
        # Avoid circular import
579
        from lp.code.interfaces.branch import get_blacklisted_hostnames
580
        uri = URI(url)
581
        launchpad_domain = config.vhost.mainsite.hostname
582
        if uri.underDomain(launchpad_domain):
583
            raise BadUrlLaunchpad(url)
584
        for hostname in get_blacklisted_hostnames():
585
            if uri.underDomain(hostname):
586
                raise BadUrl(url)
587
        if uri.scheme in ['sftp', 'bzr+ssh']:
588
            raise BadUrlSsh(url)
589
        elif uri.scheme not in ['http', 'https']:
590
            raise BadUrlScheme(uri.scheme, url)
591
592
593
class ImportedBranchPolicy(BranchMirrorerPolicy):
594
    """Mirroring policy for IMPORTED branches.
595
596
    In summary:
597
598
     - don't follow references,
599
     - assert the URLs start with the prefix we expect for imported branches.
600
    """
601
602
    def createDestinationBranch(self, source_branch, destination_url):
603
        """See `BranchOpenPolicy.createDestinationBranch`.
604
605
        Because we control the process that creates import branches, a
606
        vfs-level copy is safe and more efficient than a bzr fetch.
607
        """
608
        source_transport = source_branch.bzrdir.root_transport
609
        dest_transport = get_transport(destination_url)
610
        while True:
611
            # We loop until the remote file list before and after the copy is
612
            # the same to catch the case where the remote side is being
613
            # mutated as we copy it.
614
            if dest_transport.has('.'):
615
                dest_transport.delete_tree('.')
616
            files_before = set(source_transport.iter_files_recursive())
617
            source_transport.copy_tree_to_transport(dest_transport)
618
            files_after = set(source_transport.iter_files_recursive())
619
            if files_before == files_after:
620
                break
621
        return Branch.open_from_transport(dest_transport)
622
623
    def shouldFollowReferences(self):
624
        """See `BranchOpenerPolicy.shouldFollowReferences`.
625
626
        We do not traverse references for IMPORTED branches because the
627
        code-import system should never produce branch references.
628
        """
629
        return False
630
631
    def transformFallbackLocation(self, branch, url):
632
        """See `BranchOpenerPolicy.transformFallbackLocation`.
633
634
        Import branches should not be stacked, ever.
635
        """
636
        raise AssertionError("Import branch unexpectedly stacked!")
637
638
    def checkOneURL(self, url):
639
        """See `BranchOpenerPolicy.checkOneURL`.
640
641
        If the URL we are mirroring from does not start how we expect the pull
642
        URLs of import branches to start, something has gone badly wrong, so
643
        we raise AssertionError if that's happened.
644
        """
645
        if not url.startswith(config.launchpad.bzr_imports_root_url):
646
            raise AssertionError(
647
                "Bogus URL for imported branch: %r" % url)
648
649
650
def make_branch_mirrorer(branch_type, protocol=None,
651
                         mirror_stacked_on_url=None):
652
    """Create a `BranchMirrorer` with the appropriate `BranchOpenerPolicy`.
653
654
    :param branch_type: A `BranchType` to select a policy by.
655
    :param protocol: Optional protocol for the mirrorer to work with.
656
        If given, its log will also be used.
657
    :param mirror_stacked_on_url: For mirrored branches, the default URL
658
        to stack on.  Ignored for other branch types.
659
    :return: A `BranchMirrorer`.
660
    """
661
    if branch_type == BranchType.MIRRORED:
662
        policy = MirroredBranchPolicy(mirror_stacked_on_url)
663
    elif branch_type == BranchType.IMPORTED:
664
        policy = ImportedBranchPolicy()
665
    else:
666
        raise AssertionError(
667
            "Unexpected branch type: %r" % branch_type)
668
669
    if protocol is not None:
670
        log_function = protocol.log
671
    else:
672
        log_function = None
673
674
    return BranchMirrorer(policy, protocol, log_function)