~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
# Copyright 2009 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""The code import worker. This imports code from foreign repositories."""

__metaclass__ = type
__all__ = [
    'BazaarBranchStore',
    'BzrImportWorker',
    'BzrSvnImportWorker',
    'CSCVSImportWorker',
    'CodeImportBranchOpenPolicy',
    'CodeImportSourceDetails',
    'CodeImportWorkerExitCode',
    'ForeignTreeStore',
    'GitImportWorker',
    'HgImportWorker',
    'ImportWorker',
    'get_default_bazaar_branch_store',
    ]


import os
import shutil

from bzrlib.branch import (
    Branch,
    InterBranch,
    )
from bzrlib.bzrdir import (
    BzrDir,
    BzrDirFormat,
    )
from bzrlib.errors import (
    ConnectionError,
    InvalidEntryName,
    NoRepositoryPresent,
    NoSuchFile,
    NotBranchError,
    )
from bzrlib.transport import get_transport
import bzrlib.ui
from bzrlib.upgrade import upgrade
from bzrlib.urlutils import (
    join as urljoin,
    local_path_from_url,
    )
import cscvs
from cscvs.cmds import totla
import CVS
import SCM

from canonical.config import config

from lazr.uri import (
    InvalidURIError,
    URI,
    )

from lp.code.enums import RevisionControlSystems
from lp.code.interfaces.branch import get_blacklisted_hostnames
from lp.codehosting.codeimport.foreigntree import (
    CVSWorkingTree,
    SubversionWorkingTree,
    )
from lp.codehosting.safe_open import (
    BadUrl,
    BranchOpenPolicy,
    SafeBranchOpener,
    )
from lp.codehosting.codeimport.tarball import (
    create_tarball,
    extract_tarball,
    )
from lp.codehosting.codeimport.uifactory import LoggingUIFactory
from lp.services.propertycache import cachedproperty


class CodeImportBranchOpenPolicy(BranchOpenPolicy):
    """Branch open policy for code imports.

    In summary:
     - follow references,
     - only open non-Launchpad URLs
     - only open the allowed schemes
    """

    allowed_schemes = ['http', 'https', 'svn', 'git', 'ftp']

    def shouldFollowReferences(self):
        """See `BranchOpenPolicy.shouldFollowReferences`.

        We traverse branch references for MIRRORED branches because they
        provide a useful redirection mechanism and we want to be consistent
        with the bzr command line.
        """
        return True

    def transformFallbackLocation(self, branch, url):
        """See `BranchOpenPolicy.transformFallbackLocation`.

        For mirrored branches, we stack on whatever the remote branch claims
        to stack on, but this URL still needs to be checked.
        """
        return urljoin(branch.base, url), True

    def checkOneURL(self, url):
        """See `BranchOpenPolicy.checkOneURL`.

        We refuse to mirror from Launchpad or a ssh-like or file URL.
        """
        try:
            uri = URI(url)
        except InvalidURIError:
            raise BadUrl(url)
        launchpad_domain = config.vhost.mainsite.hostname
        if uri.underDomain(launchpad_domain):
            raise BadUrl(url)
        for hostname in get_blacklisted_hostnames():
            if uri.underDomain(hostname):
                raise BadUrl(url)
        if uri.scheme not in self.allowed_schemes:
            raise BadUrl(url)


class CodeImportWorkerExitCode:
    """Exit codes used by the code import worker script."""

    SUCCESS = 0
    FAILURE = 1
    SUCCESS_NOCHANGE = 2
    SUCCESS_PARTIAL = 3
    FAILURE_INVALID = 4
    FAILURE_UNSUPPORTED_FEATURE = 5
    FAILURE_FORBIDDEN = 6


class BazaarBranchStore:
    """A place where Bazaar branches of code imports are kept."""

    def __init__(self, transport):
        """Construct a Bazaar branch store based at `transport`."""
        self.transport = transport

    def _getMirrorURL(self, db_branch_id):
        """Return the URL that `db_branch` is stored at."""
        return urljoin(self.transport.base, '%08x' % db_branch_id)

    def pull(self, db_branch_id, target_path, required_format,
             needs_tree=False):
        """Pull down the Bazaar branch of an import to `target_path`.

        :return: A Bazaar branch for the code import corresponding to the
            database branch with id `db_branch_id`.
        """
        remote_url = self._getMirrorURL(db_branch_id)
        try:
            remote_bzr_dir = BzrDir.open(remote_url)
        except NotBranchError:
            local_branch = BzrDir.create_branch_and_repo(
                target_path, format=required_format)
            if needs_tree:
                local_branch.bzrdir.create_workingtree()
            return local_branch
        # The proper thing to do here would be to call
        # "remote_bzr_dir.sprout()".  But 2a fetch slowly checks which
        # revisions are in the ancestry of the tip of the remote branch, which
        # we strictly don't care about, so we just copy the whole thing down
        # at the vfs level.
        control_dir = remote_bzr_dir.root_transport.relpath(
            remote_bzr_dir.transport.abspath('.'))
        target = get_transport(target_path)
        target_control = target.clone(control_dir)
        target_control.create_prefix()
        remote_bzr_dir.transport.copy_tree_to_transport(target_control)
        local_bzr_dir = BzrDir.open_from_transport(target)
        if local_bzr_dir.needs_format_conversion(format=required_format):
            try:
                local_bzr_dir.root_transport.delete_tree('backup.bzr')
            except NoSuchFile:
                pass
            upgrade(target_path, required_format, clean_up=True)
        if needs_tree:
            local_bzr_dir.create_workingtree()
        return local_bzr_dir.open_branch()

    def push(self, db_branch_id, bzr_branch, required_format):
        """Push up `bzr_branch` as the Bazaar branch for `code_import`.

        :return: A boolean that is true if the push was non-trivial
            (i.e. actually transferred revisions).
        """
        self.transport.create_prefix()
        target_url = self._getMirrorURL(db_branch_id)
        try:
            remote_branch = Branch.open(target_url)
        except NotBranchError:
            remote_branch = BzrDir.create_branch_and_repo(
                target_url, format=required_format)
            old_branch = None
        else:
            if remote_branch.bzrdir.needs_format_conversion(
                    required_format):
                # For upgrades, push to a new branch in
                # the new format. When done pushing,
                # retire the old .bzr directory and rename
                # the new one in place.
                old_branch = remote_branch
                upgrade_url = urljoin(target_url, "backup.bzr")
                try:
                    remote_branch.bzrdir.root_transport.delete_tree(
                        'backup.bzr')
                except NoSuchFile:
                    pass
                remote_branch = BzrDir.create_branch_and_repo(
                    upgrade_url, format=required_format)
            else:
                old_branch = None
        pull_result = remote_branch.pull(bzr_branch, overwrite=True)
        # Because of the way we do incremental imports, there may be revisions
        # in the branch's repo that are not in the ancestry of the branch tip.
        # We need to transfer them too.
        remote_branch.repository.fetch(bzr_branch.repository)
        if old_branch is not None:
            # The format has changed; move the new format
            # branch in place.
            base_transport = old_branch.bzrdir.root_transport
            base_transport.delete_tree('.bzr')
            base_transport.rename("backup.bzr/.bzr", ".bzr")
            base_transport.rmdir("backup.bzr")
        return pull_result.old_revid != pull_result.new_revid


def get_default_bazaar_branch_store():
    """Return the default `BazaarBranchStore`."""
    return BazaarBranchStore(
        get_transport(config.codeimport.bazaar_branch_store))


class CodeImportSourceDetails:
    """The information needed to process an import.

    As the worker doesn't talk to the database, we don't use
    `CodeImport` objects for this.

    The 'fromArguments' and 'asArguments' methods convert to and from a form
    of the information suitable for passing around on executables' command
    lines.

    :ivar branch_id: The id of the branch associated to this code import, used
        for locating the existing import and the foreign tree.
    :ivar rcstype: 'svn', 'cvs', 'hg', 'git', 'bzr-svn', 'bzr' as appropriate.
    :ivar url: The branch URL if rcstype in ['svn', 'bzr-svn',
        'git', 'hg', 'bzr'], None otherwise.
    :ivar cvs_root: The $CVSROOT if rcstype == 'cvs', None otherwise.
    :ivar cvs_module: The CVS module if rcstype == 'cvs', None otherwise.
    """

    def __init__(self, branch_id, rcstype, url=None, cvs_root=None,
                 cvs_module=None):
        self.branch_id = branch_id
        self.rcstype = rcstype
        self.url = url
        self.cvs_root = cvs_root
        self.cvs_module = cvs_module

    @classmethod
    def fromArguments(cls, arguments):
        """Convert command line-style arguments to an instance."""
        branch_id = int(arguments.pop(0))
        rcstype = arguments.pop(0)
        if rcstype in ['svn', 'bzr-svn', 'git', 'hg', 'bzr']:
            [url] = arguments
            cvs_root = cvs_module = None
        elif rcstype == 'cvs':
            url = None
            [cvs_root, cvs_module] = arguments
        else:
            raise AssertionError("Unknown rcstype %r." % rcstype)
        return cls(branch_id, rcstype, url, cvs_root, cvs_module)

    @classmethod
    def fromCodeImport(cls, code_import):
        """Convert a `CodeImport` to an instance."""
        branch_id = code_import.branch.id
        if code_import.rcs_type == RevisionControlSystems.SVN:
            return cls(branch_id, 'svn', str(code_import.url))
        elif code_import.rcs_type == RevisionControlSystems.BZR_SVN:
            return cls(branch_id, 'bzr-svn', str(code_import.url))
        elif code_import.rcs_type == RevisionControlSystems.CVS:
            return cls(
                branch_id, 'cvs',
                cvs_root=str(code_import.cvs_root),
                cvs_module=str(code_import.cvs_module))
        elif code_import.rcs_type == RevisionControlSystems.GIT:
            return cls(branch_id, 'git', str(code_import.url))
        elif code_import.rcs_type == RevisionControlSystems.HG:
            return cls(branch_id, 'hg', str(code_import.url))
        elif code_import.rcs_type == RevisionControlSystems.BZR:
            return cls(branch_id, 'bzr', str(code_import.url))
        else:
            raise AssertionError("Unknown rcstype %r." % code_import.rcs_type)

    def asArguments(self):
        """Return a list of arguments suitable for passing to a child process.
        """
        result = [str(self.branch_id), self.rcstype]
        if self.rcstype in ['svn', 'bzr-svn', 'git', 'hg', 'bzr']:
            result.append(self.url)
        elif self.rcstype == 'cvs':
            result.append(self.cvs_root)
            result.append(self.cvs_module)
        else:
            raise AssertionError("Unknown rcstype %r." % self.rcstype)
        return result


class ImportDataStore:
    """A store for data associated with an import.

    Import workers can store and retreive files into and from the store using
    `put()` and `fetch()`.

    So this store can find files stored by previous versions of this code, the
    files are stored at ``<BRANCH ID IN HEX>.<EXT>`` where BRANCH ID comes
    from the CodeImportSourceDetails used to construct the instance and EXT
    comes from the local name passed to `put` or `fetch`.
    """

    def __init__(self, transport, source_details):
        """Initialize an `ImportDataStore`.

        :param transport: The transport files will be stored on.
        :param source_details: The `CodeImportSourceDetails` object, used to
            know where to store files on the remote transport.
        """
        self.source_details = source_details
        self._transport = transport
        self._branch_id = source_details.branch_id

    def _getRemoteName(self, local_name):
        """Convert `local_name` to the name used to store a file.

        The algorithm is a little stupid for historical reasons: we chop off
        the extension and stick that on the end of the branch id from the
        source_details we were constructed with, in hex padded to 8
        characters.  For example 'tree.tar.gz' might become '0000a23d.tar.gz'
        or 'git.db' might become '00003e4.db'.

        :param local_name: The local name of the file to be stored.
        :return: The name to store the file as on the remote transport.
        """
        if '/' in local_name:
            raise AssertionError("local_name must be a name, not a path")
        dot_index = local_name.index('.')
        if dot_index < 0:
            raise AssertionError("local_name must have an extension.")
        ext = local_name[dot_index:]
        return '%08x%s' % (self._branch_id, ext)

    def fetch(self, filename, dest_transport=None):
        """Retrieve `filename` from the store.

        :param filename: The name of the file to retrieve (must be a filename,
            not a path).
        :param dest_transport: The transport to retrieve the file to,
            defaulting to ``get_transport('.')``.
        :return: A boolean, true if the file was found and retrieved, false
            otherwise.
        """
        if dest_transport is None:
            dest_transport = get_transport('.')
        remote_name = self._getRemoteName(filename)
        if self._transport.has(remote_name):
            dest_transport.put_file(
                filename, self._transport.get(remote_name))
            return True
        else:
            return False

    def put(self, filename, source_transport=None):
        """Put `filename` into the store.

        :param filename: The name of the file to store (must be a filename,
            not a path).
        :param source_transport: The transport to look for the file on,
            defaulting to ``get_transport('.')``.
        """
        if source_transport is None:
            source_transport = get_transport('.')
        remote_name = self._getRemoteName(filename)
        local_file = source_transport.get(filename)
        self._transport.create_prefix()
        try:
            self._transport.put_file(remote_name, local_file)
        finally:
            local_file.close()


class ForeignTreeStore:
    """Manages retrieving and storing foreign working trees.

    The code import system stores tarballs of CVS and SVN working trees on
    another system. The tarballs are kept in predictable locations based on
    the ID of the branch associated to the `CodeImport`.

    The tarballs are all kept in one directory. The filename of a tarball is
    XXXXXXXX.tar.gz, where 'XXXXXXXX' is the ID of the `CodeImport`'s branch
    in hex.
    """

    def __init__(self, import_data_store):
        """Construct a `ForeignTreeStore`.

        :param transport: A writable transport that points to the base
            directory where the tarballs are stored.
        :ptype transport: `bzrlib.transport.Transport`.
        """
        self.import_data_store = import_data_store

    def _getForeignTree(self, target_path):
        """Return a foreign tree object for `target_path`."""
        source_details = self.import_data_store.source_details
        if source_details.rcstype == 'svn':
            return SubversionWorkingTree(
                source_details.url, str(target_path))
        elif source_details.rcstype == 'cvs':
            return CVSWorkingTree(
                source_details.cvs_root, source_details.cvs_module,
                target_path)
        else:
            raise AssertionError(
                "unknown RCS type: %r" % source_details.rcstype)

    def archive(self, foreign_tree):
        """Archive the foreign tree."""
        local_name = 'foreign_tree.tar.gz'
        create_tarball(foreign_tree.local_path, 'foreign_tree.tar.gz')
        self.import_data_store.put(local_name)

    def fetch(self, target_path):
        """Fetch the foreign branch for `source_details` to `target_path`.

        If there is no tarball archived for `source_details`, then try to
        download (i.e. checkout) the foreign tree from its source repository,
        generally on a third party server.
        """
        try:
            return self.fetchFromArchive(target_path)
        except NoSuchFile:
            return self.fetchFromSource(target_path)

    def fetchFromSource(self, target_path):
        """Fetch the foreign tree for `source_details` to `target_path`."""
        branch = self._getForeignTree(target_path)
        branch.checkout()
        return branch

    def fetchFromArchive(self, target_path):
        """Fetch the foreign tree for `source_details` from the archive."""
        local_name = 'foreign_tree.tar.gz'
        if not self.import_data_store.fetch(local_name):
            raise NoSuchFile(local_name)
        extract_tarball(local_name, target_path)
        tree = self._getForeignTree(target_path)
        tree.update()
        return tree


class ImportWorker:
    """Oversees the actual work of a code import."""

    # Where the Bazaar working tree will be stored.
    BZR_BRANCH_PATH = 'bzr_branch'

    # Should `getBazaarBranch` create a working tree?
    needs_bzr_tree = True

    required_format = BzrDirFormat.get_default_format()

    def __init__(self, source_details, import_data_transport,
                 bazaar_branch_store, logger, opener_policy):
        """Construct an `ImportWorker`.

        :param source_details: A `CodeImportSourceDetails` object.
        :param bazaar_branch_store: A `BazaarBranchStore`. The import worker
            uses this to fetch and store the Bazaar branches that are created
            and updated during the import process.
        :param logger: A `Logger` to pass to cscvs.
        :param opener_policy: Policy object that decides what branches can
             be imported
        """
        self.source_details = source_details
        self.bazaar_branch_store = bazaar_branch_store
        self.import_data_store = ImportDataStore(
            import_data_transport, self.source_details)
        self._logger = logger
        self._opener_policy = opener_policy

    def getBazaarBranch(self):
        """Return the Bazaar `Branch` that we are importing into."""
        if os.path.isdir(self.BZR_BRANCH_PATH):
            shutil.rmtree(self.BZR_BRANCH_PATH)
        return self.bazaar_branch_store.pull(
            self.source_details.branch_id, self.BZR_BRANCH_PATH,
            self.required_format, self.needs_bzr_tree)

    def pushBazaarBranch(self, bazaar_branch):
        """Push the updated Bazaar branch to the server.

        :return: True if revisions were transferred.
        """
        return self.bazaar_branch_store.push(
            self.source_details.branch_id, bazaar_branch,
            self.required_format)

    def getWorkingDirectory(self):
        """The directory we should change to and store all scratch files in.
        """
        base = config.codeimportworker.working_directory_root
        dirname = 'worker-for-branch-%s' % self.source_details.branch_id
        return os.path.join(base, dirname)

    def run(self):
        """Run the code import job.

        This is the primary public interface to the `ImportWorker`. This
        method:

         1. Retrieves an up-to-date foreign tree to import.
         2. Gets the Bazaar branch to import into.
         3. Imports the foreign tree into the Bazaar branch. If we've
            already imported this before, we synchronize the imported Bazaar
            branch with the latest changes to the foreign tree.
         4. Publishes the newly-updated Bazaar branch, making it available to
            Launchpad users.
         5. Archives the foreign tree, so that we can update it quickly next
            time.
        """
        working_directory = self.getWorkingDirectory()
        if os.path.exists(working_directory):
            shutil.rmtree(working_directory)
        os.makedirs(working_directory)
        saved_pwd = os.getcwd()
        os.chdir(working_directory)
        try:
            return self._doImport()
        finally:
            shutil.rmtree(working_directory)
            os.chdir(saved_pwd)

    def _doImport(self):
        """Perform the import.

        :return: A CodeImportWorkerExitCode
        """
        raise NotImplementedError()


class CSCVSImportWorker(ImportWorker):
    """An ImportWorker for imports that use CSCVS.

    As well as invoking cscvs to do the import, this class also needs to
    manage a foreign working tree.
    """

    # Where the foreign working tree will be stored.
    FOREIGN_WORKING_TREE_PATH = 'foreign_working_tree'

    @cachedproperty
    def foreign_tree_store(self):
        return ForeignTreeStore(self.import_data_store)

    def getForeignTree(self):
        """Return the foreign branch object that we are importing from.

        :return: A `SubversionWorkingTree` or a `CVSWorkingTree`.
        """
        if os.path.isdir(self.FOREIGN_WORKING_TREE_PATH):
            shutil.rmtree(self.FOREIGN_WORKING_TREE_PATH)
        os.mkdir(self.FOREIGN_WORKING_TREE_PATH)
        return self.foreign_tree_store.fetch(self.FOREIGN_WORKING_TREE_PATH)

    def importToBazaar(self, foreign_tree, bazaar_branch):
        """Actually import `foreign_tree` into `bazaar_branch`.

        :param foreign_tree: A `SubversionWorkingTree` or a `CVSWorkingTree`.
        :param bazaar_tree: A `bzrlib.branch.Branch`, which must have a
            colocated working tree.
        """
        foreign_directory = foreign_tree.local_path
        bzr_directory = str(bazaar_branch.bzrdir.open_workingtree().basedir)

        scm_branch = SCM.branch(bzr_directory)
        last_commit = cscvs.findLastCscvsCommit(scm_branch)

        # If branch in `bazaar_tree` doesn't have any identifiable CSCVS
        # revisions, CSCVS "initializes" the branch.
        if last_commit is None:
            self._runToBaz(
                foreign_directory, "-SI", "MAIN.1", bzr_directory)

        # Now we synchronise the branch, that is, import all new revisions
        # from the foreign branch into the Bazaar branch. If we've just
        # initialized the Bazaar branch, then this means we import *all*
        # revisions.
        last_commit = cscvs.findLastCscvsCommit(scm_branch)
        self._runToBaz(
            foreign_directory, "-SC", "%s::" % last_commit, bzr_directory)

    def _runToBaz(self, source_dir, flags, revisions, bazpath):
        """Actually run the CSCVS utility that imports revisions.

        :param source_dir: The directory containing the foreign working tree
            that we are importing from.
        :param flags: Flags to pass to `totla.totla`.
        :param revisions: The revisions to import.
        :param bazpath: The directory containing the Bazaar working tree that
            we are importing into.
        """
        # XXX: JonathanLange 2008-02-08: We need better documentation for
        # `flags` and `revisions`.
        config = CVS.Config(source_dir)
        config.args = ["--strict", "-b", bazpath,
                       flags, revisions, bazpath]
        totla.totla(config, self._logger, config.args, SCM.tree(source_dir))

    def _doImport(self):
        foreign_tree = self.getForeignTree()
        bazaar_branch = self.getBazaarBranch()
        self.importToBazaar(foreign_tree, bazaar_branch)
        non_trivial = self.pushBazaarBranch(bazaar_branch)
        self.foreign_tree_store.archive(foreign_tree)
        if non_trivial:
            return CodeImportWorkerExitCode.SUCCESS
        else:
            return CodeImportWorkerExitCode.SUCCESS_NOCHANGE


class PullingImportWorker(ImportWorker):
    """An import worker for imports that can be done by a bzr plugin.

    Subclasses need to implement `probers`.
    """

    needs_bzr_tree = False

    @property
    def invalid_branch_exceptions(self):
        """Exceptions that indicate no (valid) remote branch is present."""
        raise NotImplementedError

    @property
    def unsupported_feature_exceptions(self):
        """The exceptions to consider for unsupported features."""
        raise NotImplementedError

    @property
    def probers(self):
        """The probers that should be tried for this import."""
        raise NotImplementedError

    def getRevisionLimit(self):
        """Return maximum number of revisions to fetch (None for no limit).
        """
        return None

    def _open_dir(self, url):
        """Simple BzrDir.open clone that only uses self.probers.

        :param url: URL to open
        :return: ControlDir instance
        """
        transport = get_transport(url)
        for prober_kls in self.probers:
            prober = prober_kls()
            try:
                format = prober.probe_transport(transport)
            except NotBranchError:
                pass
            else:
                return format.open(transport)
        else:
            raise NotBranchError("Not a valid branch")

    def _doImport(self):
        self._logger.info("Starting job.")
        saved_factory = bzrlib.ui.ui_factory
        opener = SafeBranchOpener(self._opener_policy)
        bzrlib.ui.ui_factory = LoggingUIFactory(logger=self._logger)
        try:
            self._logger.info(
                "Getting exising bzr branch from central store.")
            bazaar_branch = self.getBazaarBranch()
            try:
                remote_branch = opener.open(
                    self.source_details.url, self._open_dir)
            except NotBranchError:
                self._logger.info("No branch found at remote location.")
                return CodeImportWorkerExitCode.FAILURE_INVALID
            except BadUrl, e:
                self._logger.info("Invalid URL: %s" % e)
                return CodeImportWorkerExitCode.FAILURE_FORBIDDEN
            remote_branch_tip = remote_branch.last_revision()
            inter_branch = InterBranch.get(remote_branch, bazaar_branch)
            self._logger.info("Importing branch.")
            try:
                revision_limit = self.getRevisionLimit()
                if revision_limit is None:
                    # bzr < 2.4 does not support InterBranch.fetch()
                    bazaar_branch.fetch(remote_branch)
                else:
                    inter_branch.fetch(limit=revision_limit)
                if bazaar_branch.repository.has_revision(remote_branch_tip):
                    pull_result = inter_branch.pull(overwrite=True)
                    if pull_result.old_revid != pull_result.new_revid:
                        result = CodeImportWorkerExitCode.SUCCESS
                    else:
                        result = CodeImportWorkerExitCode.SUCCESS_NOCHANGE
                else:
                    result = CodeImportWorkerExitCode.SUCCESS_PARTIAL
            except Exception, e:
                if e.__class__ in self.unsupported_feature_exceptions:
                    self._logger.info(
                        "Unable to import branch because of limitations in "
                        "Bazaar.")
                    self._logger.info(str(e))
                    return (
                        CodeImportWorkerExitCode.FAILURE_UNSUPPORTED_FEATURE)
                elif e.__class__ in self.invalid_branch_exceptions:
                    self._logger.info("Branch invalid: %s", e(str))
                    return CodeImportWorkerExitCode.FAILURE_INVALID
                else:
                    raise
            self._logger.info("Pushing local import branch to central store.")
            self.pushBazaarBranch(bazaar_branch)
            self._logger.info("Job complete.")
            return result
        finally:
            bzrlib.ui.ui_factory = saved_factory


class GitImportWorker(PullingImportWorker):
    """An import worker for Git imports.

    The only behaviour we add is preserving the 'git.db' shamap between runs.
    """

    @property
    def invalid_branch_exceptions(self):
        return [
            NoRepositoryPresent,
            NotBranchError,
            ConnectionError,
        ]

    @property
    def unsupported_feature_exceptions(self):
        from bzrlib.plugins.git.fetch import SubmodulesRequireSubtrees
        return [
            InvalidEntryName,
            SubmodulesRequireSubtrees,
        ]

    @property
    def probers(self):
        """See `PullingImportWorker.probers`."""
        from bzrlib.plugins.git import (
            LocalGitProber, RemoteGitProber)
        return [LocalGitProber, RemoteGitProber]

    def getRevisionLimit(self):
        """See `PullingImportWorker.getRevisionLimit`."""
        return config.codeimport.git_revisions_import_limit

    def getBazaarBranch(self):
        """See `ImportWorker.getBazaarBranch`.

        In addition to the superclass' behaviour, we retrieve bzr-git's
        caches, both legacy and modern, from the import data store and put
        them where bzr-git will find them in the Bazaar tree, that is at
        '.bzr/repository/git.db' and '.bzr/repository/git'.
        """
        branch = PullingImportWorker.getBazaarBranch(self)
        # Fetch the legacy cache from the store, if present.
        self.import_data_store.fetch(
            'git.db', branch.repository._transport)
        # The cache dir from newer bzr-gits is stored as a tarball.
        local_name = 'git-cache.tar.gz'
        if self.import_data_store.fetch(local_name):
            repo_transport = branch.repository._transport
            repo_transport.mkdir('git')
            git_db_dir = os.path.join(
                local_path_from_url(repo_transport.base), 'git')
            extract_tarball(local_name, git_db_dir)
        return branch

    def pushBazaarBranch(self, bazaar_branch):
        """See `ImportWorker.pushBazaarBranch`.

        In addition to the superclass' behaviour, we store bzr-git's cache
        directory at .bzr/repository/git in the import data store.
        """
        non_trivial = PullingImportWorker.pushBazaarBranch(
            self, bazaar_branch)
        repo_base = bazaar_branch.repository._transport.base
        git_db_dir = os.path.join(local_path_from_url(repo_base), 'git')
        local_name = 'git-cache.tar.gz'
        create_tarball(git_db_dir, local_name)
        self.import_data_store.put(local_name)
        return non_trivial


class HgImportWorker(PullingImportWorker):
    """An import worker for Mercurial imports.

    The only behaviour we add is preserving the id-sha map between runs.
    """

    @property
    def invalid_branch_exceptions(self):
        return [
            NoRepositoryPresent,
            NotBranchError,
            ConnectionError,
        ]

    @property
    def unsupported_feature_exceptions(self):
        return [
            InvalidEntryName,
        ]

    @property
    def probers(self):
        """See `PullingImportWorker.probers`."""
        from bzrlib.plugins.hg import HgProber
        return [HgProber]

    def getRevisionLimit(self):
        """See `PullingImportWorker.getRevisionLimit`."""
        return config.codeimport.hg_revisions_import_limit

    def getBazaarBranch(self):
        """See `ImportWorker.getBazaarBranch`.

        In addition to the superclass' behaviour, we retrieve the bzr-hg's
        caches, both legacy and current and put them where bzr-hg will find
        them in the Bazaar tree, that is at '.bzr/repository/hg-v2.db' and
        '.bzr/repository/hg'.
        """
        branch = PullingImportWorker.getBazaarBranch(self)
        # Fetch the legacy cache from the store, if present.
        self.import_data_store.fetch(
            'hg-v2.db', branch.repository._transport)
        # The cache dir from newer bzr-hgs is stored as a tarball.
        local_name = 'hg-cache.tar.gz'
        if self.import_data_store.fetch(local_name):
            repo_transport = branch.repository._transport
            repo_transport.mkdir('hg')
            hg_db_dir = os.path.join(
                local_path_from_url(repo_transport.base), 'hg')
            extract_tarball(local_name, hg_db_dir)
        return branch

    def pushBazaarBranch(self, bazaar_branch):
        """See `ImportWorker.pushBazaarBranch`.

        In addition to the superclass' behaviour, we store the hg cache
        that bzr-hg will have created at .bzr/repository/hg into
        the import data store.
        """
        non_trivial = PullingImportWorker.pushBazaarBranch(
            self, bazaar_branch)
        repo_base = bazaar_branch.repository._transport.base
        hg_db_dir = os.path.join(local_path_from_url(repo_base), 'hg')
        local_name = 'hg-cache.tar.gz'
        create_tarball(hg_db_dir, local_name)
        self.import_data_store.put(local_name)
        return non_trivial


class BzrSvnImportWorker(PullingImportWorker):
    """An import worker for importing Subversion via bzr-svn."""

    @property
    def invalid_branch_exceptions(self):
        return [
            NoRepositoryPresent,
            NotBranchError,
            ConnectionError,
        ]

    @property
    def unsupported_feature_exceptions(self):
        from bzrlib.plugins.svn.errors import InvalidFileName
        return [
            InvalidEntryName,
            InvalidFileName,
        ]

    def getRevisionLimit(self):
        """See `PullingImportWorker.getRevisionLimit`."""
        return config.codeimport.svn_revisions_import_limit

    @property
    def probers(self):
        """See `PullingImportWorker.probers`."""
        from bzrlib.plugins.svn import SvnRemoteProber
        return [SvnRemoteProber]


class BzrImportWorker(PullingImportWorker):
    """An import worker for importing Bazaar branches."""

    invalid_branch_exceptions = [
        NotBranchError,
        ConnectionError,
        ]
    unsupported_feature_exceptions = []

    def getRevisionLimit(self):
        """See `PullingImportWorker.getRevisionLimit`."""
        # For now, just grab the whole branch at once.
        # bzr does support fetch(limit=) but it isn't very efficient at the moment.
        return None

    @property
    def probers(self):
        """See `PullingImportWorker.probers`."""
        from bzrlib.bzrdir import BzrProber, RemoteBzrProber
        return [BzrProber, RemoteBzrProber]