19
19
from bzrlib.graph import DictParentsProvider
20
20
from bzrlib.revision import NULL_REVISION
22
23
from storm.locals import Store
24
24
from zope.component import getUtility
25
25
from zope.event import notify
27
27
from canonical.config import config
28
29
from lp.code.interfaces.branchjob import IRosettaUploadJobSource
29
30
from lp.code.interfaces.revision import IRevisionSet
30
from lp.code.model.branchrevision import BranchRevision
31
from lp.code.model.branchrevision import (BranchRevision)
31
32
from lp.code.model.revision import Revision
32
33
from lp.codehosting.scanner import events
33
34
from lp.services.utils import iter_list_chunks
84
85
# Get the history and ancestry from the branch first, to fail early
85
86
# if something is wrong with the branch.
86
87
self.logger.info("Retrieving history from bzrlib.")
87
bzr_history = bzr_branch.revision_history()
88
last_revision_info = bzr_branch.last_revision_info()
88
89
# The BranchRevision, Revision and RevisionParent tables are only
89
90
# written to by the branch-scanner, so they are not subject to
90
91
# write-lock contention. Update them all in a single transaction to
94
95
(new_ancestry, branchrevisions_to_delete,
95
96
revids_to_insert) = self.planDatabaseChanges(
96
bzr_branch, bzr_history, db_ancestry, db_history)
97
bzr_branch, last_revision_info, db_ancestry, db_history)
98
99
new_ancestry - getUtility(IRevisionSet).onlyPresent(new_ancestry))
99
100
self.logger.info("Adding %s new revisions.", len(new_db_revs))
125
126
# not been updated. Since this has no ill-effect, and can only err on
126
127
# the pessimistic side (tell the user the data has not yet been
127
128
# updated although it has), the race is acceptable.
128
self.updateBranchStatus(bzr_history)
129
self.updateBranchStatus(last_revision_info)
130
131
events.ScanCompleted(
131
132
self.db_branch, bzr_branch, self.logger, new_ancestry))
156
157
return bzr_branch.repository.get_graph(PPSource)
158
def getAncestryDelta(self, bzr_branch):
159
bzr_last = bzr_branch.last_revision()
159
def getAncestryDelta(self, bzr_branch, bzr_last_revinfo):
160
bzr_last = bzr_last_revinfo[1]
160
161
db_last = self.db_branch.last_scanned_id
161
162
if db_last is None:
162
added_ancestry = set(bzr_branch.repository.get_ancestry(bzr_last))
163
added_ancestry.discard(None)
164
removed_ancestry = set()
166
graph = self._getRevisionGraph(bzr_branch, db_last)
163
db_last = NULL_REVISION
164
graph = self._getRevisionGraph(bzr_branch, db_last)
165
bzr_branch.lock_read()
167
167
added_ancestry, removed_ancestry = (
168
168
graph.find_difference(bzr_last, db_last))
169
added_ancestry.discard(NULL_REVISION)
171
added_ancestry.discard(NULL_REVISION)
170
172
return added_ancestry, removed_ancestry
172
def getHistoryDelta(self, bzr_history, db_history):
174
def getHistoryDelta(self, bzr_branch, bzr_last_revinfo, db_history):
173
175
self.logger.info("Calculating history delta.")
174
common_len = min(len(bzr_history), len(db_history))
176
common_len = min(bzr_last_revinfo[0], len(db_history))
177
common_revid = NULL_REVISION
175
178
while common_len > 0:
176
# The outer conditional improves efficiency. Without it, the
177
# algorithm is O(history-size * change-size), which can be
178
# excessive if a long branch is replaced by another long branch
179
# with a distant (or no) common mainline parent. The inner
180
# conditional is needed for correctness with branches where the
181
# history does not follow the line of leftmost parents.
182
if db_history[common_len - 1] == bzr_history[common_len - 1]:
183
if db_history[:common_len] == bzr_history[:common_len]:
179
if db_history[common_len - 1] == bzr_branch.get_rev_id(common_len - 1):
180
common_revid = db_history[common_len - 1]
186
183
# Revision added or removed from the branch's history. These lists may
187
184
# include revisions whose history position has merely changed.
188
185
removed_history = db_history[common_len:]
189
added_history = bzr_history[common_len:]
186
bzr_graph = bzr_branch.repository.get_graph()
187
added_history = list(bzr_graph.iter_lefthand_ancestry(bzr_last_revinfo[1],
189
added_history.reverse()
190
190
return added_history, removed_history
192
def planDatabaseChanges(self, bzr_branch, bzr_history, db_ancestry,
192
def planDatabaseChanges(self, bzr_branch, bzr_last_revinfo, db_ancestry,
194
194
"""Plan database changes to synchronize with bzrlib data.
199
199
self.logger.info("Planning changes.")
200
200
# Find the length of the common history.
201
201
added_history, removed_history = self.getHistoryDelta(
202
bzr_history, db_history)
203
added_ancestry, removed_ancestry = self.getAncestryDelta(bzr_branch)
202
bzr_branch, bzr_last_revinfo, db_history)
203
added_ancestry, removed_ancestry = self.getAncestryDelta(
204
bzr_branch, bzr_last_revinfo)
206
207
events.RevisionsRemoved(
216
217
# We must insert BranchRevision rows for all revisions which were
217
218
# added to the ancestry or whose sequence value has changed.
218
last_revno = len(bzr_history)
219
219
revids_to_insert = dict(
220
220
self.revisionsToInsert(
221
added_history, last_revno, added_ancestry))
221
added_history, bzr_last_revinfo[0], added_ancestry))
222
222
# We must remove any stray BranchRevisions that happen to already be
224
224
existing_branchrevisions = Store.of(self.db_branch).find(
296
296
for revid_seq_pair_chunk in iter_list_chunks(revid_seq_pairs, 1000):
297
297
self.db_branch.createBranchRevisionFromIDs(revid_seq_pair_chunk)
299
def updateBranchStatus(self, bzr_history):
299
def updateBranchStatus(self, (revision_count, last_revision)):
300
300
"""Update the branch-scanner status in the database Branch table."""
301
301
# Record that the branch has been updated.
302
revision_count = len(bzr_history)
303
302
if revision_count > 0:
304
last_revision = bzr_history[-1]
305
303
revision = getUtility(IRevisionSet).getByRevisionId(last_revision)