191
180
class History (object):
193
182
def __init__(self):
194
self._file_change_cache = None
183
self._change_cache = None
195
185
self._lock = threading.RLock()
198
def from_branch(cls, branch):
188
def from_branch(cls, branch, name=None):
201
191
self._branch = branch
202
self._last_revid = self._branch.last_revision()
204
self.log = logging.getLogger('loggerhead.%s' % (self._branch.nick,))
206
graph = branch.repository.get_graph()
207
parent_map = dict(((key, value) for key, value in
208
graph.iter_ancestry([self._last_revid]) if value is not None))
210
self._revision_graph = self._strip_NULL_ghosts(parent_map)
192
self._history = branch.revision_history()
193
self._last_revid = self._history[-1]
194
self._revision_graph = branch.repository.get_revision_graph(self._last_revid)
197
name = self._branch.nick
199
self.log = logging.getLogger('loggerhead.%s' % (name,))
211
201
self._full_history = []
212
202
self._revision_info = {}
213
203
self._revno_revid = {}
214
if bzrlib.revision.is_null(self._last_revid):
215
self._merge_sort = []
217
self._merge_sort = bzrlib.tsort.merge_sort(
218
self._revision_graph, self._last_revid, generate_revno=True)
204
self._merge_sort = bzrlib.tsort.merge_sort(self._revision_graph, self._last_revid, generate_revno=True)
220
206
for (seq, revid, merge_depth, revno, end_of_merge) in self._merge_sort:
221
207
self._full_history.append(revid)
222
208
revno_str = '.'.join(str(n) for n in revno)
223
209
self._revno_revid[revno_str] = revid
224
self._revision_info[revid] = (
225
seq, revid, merge_depth, revno_str, end_of_merge)
210
self._revision_info[revid] = (seq, revid, merge_depth, revno_str, end_of_merge)
227
214
# cache merge info
228
215
self._where_merged = {}
230
216
for revid in self._revision_graph.keys():
231
if self._revision_info[revid][2] == 0:
217
if not revid in self._full_history:
233
219
for parent in self._revision_graph[revid]:
234
220
self._where_merged.setdefault(parent, set()).add(revid)
236
222
self.log.info('built revision graph cache: %r secs' % (time.time() - z,))
240
def _strip_NULL_ghosts(revision_graph):
242
Copied over from bzrlib meant as a temporary workaround deprecated
246
# Filter ghosts, and null:
247
if bzrlib.revision.NULL_REVISION in revision_graph:
248
del revision_graph[bzrlib.revision.NULL_REVISION]
249
for key, parents in revision_graph.items():
250
revision_graph[key] = tuple(parent for parent in parents if parent
252
return revision_graph
255
def from_folder(cls, path):
226
def from_folder(cls, path, name=None):
256
227
b = bzrlib.branch.Branch.open(path)
259
return cls.from_branch(b)
228
return cls.from_branch(b, name)
263
230
@with_branch_lock
264
231
def out_of_date(self):
265
# the branch may have been upgraded on disk, in which case we're stale.
266
newly_opened = bzrlib.branch.Branch.open(self._branch.base)
267
if self._branch.__class__ is not \
268
newly_opened.__class__:
270
if self._branch.repository.__class__ is not \
271
newly_opened.repository.__class__:
273
return self._branch.last_revision() != self._last_revid
275
def use_file_cache(self, cache):
276
self._file_change_cache = cache
279
def has_revisions(self):
280
return not bzrlib.revision.is_null(self.last_revid)
232
if self._branch.revision_history()[-1] != self._last_revid:
236
def use_cache(self, cache):
237
self._change_cache = cache
239
def use_search_index(self, index):
244
# called when a new history object needs to be created, because the
245
# branch history has changed. we need to immediately close and stop
246
# using our caches, because a new history object will be created to
247
# replace us, using the same cache files.
248
# (may also be called during server shutdown.)
249
if self._change_cache is not None:
250
self._change_cache.close()
251
self._change_cache = None
252
if self._index is not None:
256
def flush_cache(self):
257
if self._change_cache is None:
259
self._change_cache.flush()
261
def check_rebuild(self):
262
if self._change_cache is not None:
263
self._change_cache.check_rebuild()
264
if self._index is not None:
265
self._index.check_rebuild()
282
267
last_revid = property(lambda self: self._last_revid, None, None)
269
count = property(lambda self: self._count, None, None)
284
271
@with_branch_lock
285
272
def get_config(self):
286
273
return self._branch.get_config()
276
def get_revision(self, revid):
277
return self._branch.repository.get_revision(revid)
288
279
def get_revno(self, revid):
289
280
if revid not in self._revision_info:
292
283
seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]
286
def get_sequence(self, revid):
287
seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]
295
290
def get_revision_history(self):
296
291
return self._full_history
298
def get_revids_from(self, revid_list, start_revid):
300
Yield the mainline (wrt start_revid) revisions that merged each
303
if revid_list is None:
304
revid_list = self._full_history
305
revid_set = set(revid_list)
307
def introduced_revisions(revid):
309
seq, revid, md, revno, end_of_merge = self._revision_info[revid]
311
while i < len(self._merge_sort) and self._merge_sort[i][2] > md:
312
r.add(self._merge_sort[i][1])
316
if bzrlib.revision.is_null(revid):
318
if introduced_revisions(revid) & revid_set:
293
def get_revid_sequence(self, revid_list, revid):
295
given a list of revision ids, return the sequence # of this revid in
304
def get_revids_from(self, revid_list, revid):
306
given a list of revision ids, yield revisions in graph order,
307
starting from revid. the list can be None if you just want to travel
308
across all revisions.
311
if (revid_list is None) or (revid in revid_list):
313
if not self._revision_graph.has_key(revid):
320
315
parents = self._revision_graph[revid]
321
316
if len(parents) == 0:
323
318
revid = parents[0]
325
320
@with_branch_lock
326
321
def get_short_revision_history_by_fileid(self, file_id):
327
322
# wow. is this really the only way we can get this list? by
346
341
revid_list.reverse()
348
343
return revid_list[index:]
346
def get_revision_history_matching(self, revid_list, text):
347
self.log.debug('searching %d revisions for %r', len(revid_list), text)
349
# this is going to be painfully slow. :(
352
for revid in revid_list:
353
change = self.get_changes([ revid ])[0]
354
if text in change.comment.lower():
356
self.log.debug('searched %d revisions for %r in %r secs', len(revid_list), text, time.time() - z)
359
def get_revision_history_matching_indexed(self, revid_list, text):
360
self.log.debug('searching %d revisions for %r', len(revid_list), text)
362
if self._index is None:
363
return self.get_revision_history_matching(revid_list, text)
364
out = self._index.find(text, revid_list)
365
self.log.debug('searched %d revisions for %r in %r secs: %d results', len(revid_list), text, time.time() - z, len(out))
366
# put them in some coherent order :)
367
out = [r for r in self._full_history if r in out]
350
370
@with_branch_lock
351
371
def get_search_revid_list(self, query, revid_list):
353
373
given a "quick-search" query, try a few obvious possible meanings:
355
375
- revision id or # ("128.1.3")
356
376
- date (US style "mm/dd/yy", earth style "dd-mm-yy", or iso style "yyyy-mm-dd")
357
377
- comment text as a fallback
406
429
if self.revno_re.match(revid):
407
430
revid = self._revno_revid[revid]
410
433
@with_branch_lock
411
434
def get_file_view(self, revid, file_id):
413
Given a revid and optional path, return a (revlist, revid) for
414
navigation through the current scope: from the revid (or the latest
415
revision) back to the original revision.
436
Given an optional revid and optional path, return a (revlist, revid)
437
for navigation through the current scope: from the revid (or the
438
latest revision) back to the original revision.
417
440
If file_id is None, the entire revision history is the list scope.
441
If revid is None, the latest revision is used.
419
443
if revid is None:
420
444
revid = self._last_revid
421
445
if file_id is not None:
422
# since revid is 'start_revid', possibly should start the path
423
# tracing from revid... FIXME
446
# since revid is 'start_revid', possibly should start the path tracing from revid... FIXME
447
inv = self._branch.repository.get_revision_inventory(revid)
424
448
revlist = list(self.get_short_revision_history_by_fileid(file_id))
425
449
revlist = list(self.get_revids_from(revlist, revid))
427
451
revlist = list(self.get_revids_from(None, revid))
454
return revlist, revid
430
456
@with_branch_lock
431
457
def get_view(self, revid, start_revid, file_id, query=None):
433
459
use the URL parameters (revid, start_revid, file_id, and query) to
434
460
determine the revision list we're viewing (start_revid, file_id, query)
435
461
and where we are in it (revid).
437
- if a query is given, we're viewing query results.
438
- if a file_id is given, we're viewing revisions for a specific
440
- if a start_revid is given, we're viewing the branch from a
441
specific revision up the tree.
443
these may be combined to view revisions for a specific file, from
444
a specific revision, with a specific search query.
446
returns a new (revid, start_revid, revid_list) where:
463
if a query is given, we're viewing query results.
464
if a file_id is given, we're viewing revisions for a specific file.
465
if a start_revid is given, we're viewing the branch from a
466
specific revision up the tree.
467
(these may be combined to view revisions for a specific file, from
468
a specific revision, with a specific search query.)
470
returns a new (revid, start_revid, revid_list, scan_list) where:
448
472
- revid: current position within the view
449
473
- start_revid: starting revision of this view
450
474
- revid_list: list of revision ids for this view
452
476
file_id and query are never changed so aren't returned, but they may
453
477
contain vital context for future url navigation.
455
if start_revid is None:
456
start_revid = self._last_revid
458
479
if query is None:
459
revid_list = self.get_file_view(start_revid, file_id)
480
revid_list, start_revid = self.get_file_view(start_revid, file_id)
460
481
if revid is None:
461
482
revid = start_revid
462
483
if revid not in revid_list:
463
484
# if the given revid is not in the revlist, use a revlist that
464
485
# starts at the given revid.
465
revid_list = self.get_file_view(revid, file_id)
486
revid_list, start_revid = self.get_file_view(revid, file_id)
467
487
return revid, start_revid, revid_list
469
489
# potentially limit the search
470
if file_id is not None:
471
revid_list = self.get_file_view(start_revid, file_id)
490
if (start_revid is not None) or (file_id is not None):
491
revid_list, start_revid = self.get_file_view(start_revid, file_id)
473
493
revid_list = None
475
495
revid_list = self.get_search_revid_list(query, revid_list)
476
if revid_list and len(revid_list) > 0:
496
if len(revid_list) > 0:
477
497
if revid not in revid_list:
478
498
revid = revid_list[0]
479
499
return revid, start_revid, revid_list
481
502
return None, None, []
483
504
@with_branch_lock
571
598
p.branch_nick = p_change_dict[p.revid].branch_nick
573
600
p.branch_nick = '(missing)'
575
602
@with_branch_lock
576
def get_changes(self, revid_list):
577
"""Return a list of changes objects for the given revids.
579
Revisions not present and NULL_REVISION will be ignored.
581
changes = self.get_changes_uncached(revid_list)
582
if len(changes) == 0:
603
def get_changes(self, revid_list, get_diffs=False):
604
if self._change_cache is None:
605
changes = self.get_changes_uncached(revid_list, get_diffs)
607
changes = self._change_cache.get_changes(revid_list, get_diffs)
585
611
# some data needs to be recalculated each time, because it may
586
612
# change as new revisions are added.
587
for change in changes:
588
merge_revids = self.simplify_merge_point_list(self.get_merge_point_list(change.revid))
613
for i in xrange(len(revid_list)):
614
revid = revid_list[i]
616
merge_revids = self.simplify_merge_point_list(self.get_merge_point_list(revid))
589
617
change.merge_points = [util.Container(revid=r, revno=self.get_revno(r)) for r in merge_revids]
590
if len(change.parents) > 0:
591
change.parents = [util.Container(revid=r,
592
revno=self.get_revno(r)) for r in change.parents]
593
change.revno = self.get_revno(change.revid)
596
for change in changes:
597
change.parity = parity
603
@with_bzrlib_read_lock
604
def get_changes_uncached(self, revid_list):
605
# FIXME: deprecated method in getting a null revision
606
revid_list = filter(lambda revid: not bzrlib.revision.is_null(revid),
608
parent_map = self._branch.repository.get_graph().get_parent_map(revid_list)
609
# We need to return the answer in the same order as the input,
611
present_revids = [revid for revid in revid_list
612
if revid in parent_map]
613
rev_list = self._branch.repository.get_revisions(present_revids)
615
return [self._change_from_revision(rev) for rev in rev_list]
621
# alright, let's profile this sucka.
622
def _get_changes_profiled(self, revid_list, get_diffs=False):
623
from loggerhead.lsprof import profile
625
ret, stats = profile(self.get_changes_uncached, revid_list, get_diffs)
628
cPickle.dump(stats, open('lsprof.stats', 'w'), 2)
629
self.log.info('lsprof complete!')
617
632
def _get_deltas_for_revisions_with_trees(self, revisions):
618
"""Produce a list of revision deltas.
633
"""Produce a generator of revision deltas.
620
635
Note that the input is a sequence of REVISIONS, not revision_ids.
621
636
Trees will be held in memory until the generator exits.
622
637
Each delta is relative to the revision's lefthand predecessor.
623
(This is copied from bzrlib.)
625
639
required_trees = set()
626
640
for revision in revisions:
627
required_trees.add(revision.revid)
628
required_trees.update([p.revid for p in revision.parents[:1]])
629
trees = dict((t.get_revision_id(), t) for
641
required_trees.add(revision.revision_id)
642
required_trees.update(revision.parent_ids[:1])
643
trees = dict((t.get_revision_id(), t) for
630
644
t in self._branch.repository.revision_trees(required_trees))
632
646
self._branch.repository.lock_read()
634
648
for revision in revisions:
635
if not revision.parents:
636
old_tree = self._branch.repository.revision_tree(
637
bzrlib.revision.NULL_REVISION)
649
if not revision.parent_ids:
650
old_tree = self._branch.repository.revision_tree(None)
639
old_tree = trees[revision.parents[0].revid]
640
tree = trees[revision.revid]
641
ret.append(tree.changes_from(old_tree))
652
old_tree = trees[revision.parent_ids[0]]
653
tree = trees[revision.revision_id]
654
ret.append((tree, old_tree, tree.changes_from(old_tree)))
644
657
self._branch.repository.unlock()
646
def _change_from_revision(self, revision):
648
Given a bzrlib Revision, return a processed "change" for use in
659
def entry_from_revision(self, revision):
651
660
commit_time = datetime.datetime.fromtimestamp(revision.timestamp)
653
662
parents = [util.Container(revid=r, revno=self.get_revno(r)) for r in revision.parent_ids]
664
if len(parents) == 0:
667
left_parent = revision.parent_ids[0]
655
669
message, short_message = clean_message(revision.message)
658
672
'revid': revision.revision_id,
673
'revno': self.get_revno(revision.revision_id),
659
674
'date': commit_time,
660
675
'author': revision.committer,
661
676
'branch_nick': revision.properties.get('branch-nick', None),
662
677
'short_comment': short_message,
663
678
'comment': revision.message,
664
679
'comment_clean': [util.html_clean(s) for s in message],
665
'parents': revision.parent_ids,
667
682
return util.Container(entry)
669
def get_file_changes_uncached(self, entries):
670
delta_list = self._get_deltas_for_revisions_with_trees(entries)
672
return [self.parse_delta(delta) for delta in delta_list]
675
def get_file_changes(self, entries):
676
if self._file_change_cache is None:
677
return self.get_file_changes_uncached(entries)
679
return self._file_change_cache.get_file_changes(entries)
681
def add_changes(self, entries):
682
changes_list = self.get_file_changes(entries)
684
for entry, changes in zip(entries, changes_list):
685
entry.changes = changes
688
def get_change_with_diff(self, revid, compare_revid=None):
689
change = self.get_changes([revid])[0]
691
if compare_revid is None:
693
compare_revid = change.parents[0].revid
695
compare_revid = 'null:'
697
rev_tree1 = self._branch.repository.revision_tree(compare_revid)
698
rev_tree2 = self._branch.repository.revision_tree(revid)
685
@with_bzrlib_read_lock
686
def get_changes_uncached(self, revid_list, get_diffs=False):
690
rev_list = self._branch.repository.get_revisions(revid_list)
692
except (KeyError, bzrlib.errors.NoSuchRevision), e:
693
# this sometimes happens with arch-converted branches.
694
# i don't know why. :(
695
self.log.debug('No such revision (skipping): %s', e)
696
revid_list.remove(e.revision)
698
delta_list = self._get_deltas_for_revisions_with_trees(rev_list)
699
combined_list = zip(rev_list, delta_list)
702
for rev, (new_tree, old_tree, delta) in combined_list:
703
entry = self.entry_from_revision(rev)
704
entry.changes = self.parse_delta(delta, get_diffs, old_tree, new_tree)
705
entries.append(entry)
709
@with_bzrlib_read_lock
710
def _get_diff(self, revid1, revid2):
711
rev_tree1 = self._branch.repository.revision_tree(revid1)
712
rev_tree2 = self._branch.repository.revision_tree(revid2)
699
713
delta = rev_tree2.changes_from(rev_tree1)
701
change.changes = self.parse_delta(delta)
702
change.changes.modified = self._parse_diffs(rev_tree1, rev_tree2, delta)
714
return rev_tree1, rev_tree2, delta
716
def get_diff(self, revid1, revid2):
717
rev_tree1, rev_tree2, delta = self._get_diff(revid1, revid2)
718
entry = self.get_changes([ revid2 ], False)[0]
719
entry.changes = self.parse_delta(delta, True, rev_tree1, rev_tree2)
706
722
@with_branch_lock
707
723
def get_file(self, file_id, revid):
708
724
"returns (path, filename, data)"
754
if C{get_diffs} is false, the C{chunks} will be omitted.
737
for old_path, new_path, fid, kind, text_modified, meta_modified in delta.renamed:
739
process.append((old_path, new_path, fid, kind))
740
for path, fid, kind, text_modified, meta_modified in delta.modified:
741
process.append((path, path, fid, kind))
743
for old_path, new_path, fid, kind in process:
761
def rich_filename(path, kind):
762
if kind == 'directory':
764
if kind == 'symlink':
768
def process_diff(diff):
771
for line in diff.splitlines():
774
if line.startswith('+++ ') or line.startswith('--- '):
776
if line.startswith('@@ '):
778
if chunk is not None:
780
chunk = util.Container()
782
lines = [int(x.split(',')[0][1:]) for x in line.split(' ')[1:3]]
783
old_lineno = lines[0]
784
new_lineno = lines[1]
785
elif line.startswith(' '):
786
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=new_lineno,
787
type='context', line=util.html_clean(line[1:])))
790
elif line.startswith('+'):
791
chunk.diff.append(util.Container(old_lineno=None, new_lineno=new_lineno,
792
type='insert', line=util.html_clean(line[1:])))
794
elif line.startswith('-'):
795
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=None,
796
type='delete', line=util.html_clean(line[1:])))
799
chunk.diff.append(util.Container(old_lineno=None, new_lineno=None,
800
type='unknown', line=util.html_clean(repr(line))))
801
if chunk is not None:
805
def handle_modify(old_path, new_path, fid, kind):
807
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid))
744
809
old_lines = old_tree.get_file_lines(fid)
745
810
new_lines = new_tree.get_file_lines(fid)
746
811
buffer = StringIO()
747
if old_lines != new_lines:
749
bzrlib.diff.internal_diff(old_path, old_lines,
750
new_path, new_lines, buffer)
751
except bzrlib.errors.BinaryFile:
754
diff = buffer.getvalue()
813
bzrlib.diff.internal_diff(old_path, old_lines,
814
new_path, new_lines, buffer)
815
except bzrlib.errors.BinaryFile:
757
out.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid, chunks=self._process_diff(diff), raw_diff=diff))
761
def _process_diff(self, diff):
762
# doesn't really need to be a method; could be static.
765
for line in diff.splitlines():
768
if line.startswith('+++ ') or line.startswith('--- '):
770
if line.startswith('@@ '):
772
if chunk is not None:
774
chunk = util.Container()
776
lines = [int(x.split(',')[0][1:]) for x in line.split(' ')[1:3]]
777
old_lineno = lines[0]
778
new_lineno = lines[1]
779
elif line.startswith(' '):
780
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=new_lineno,
781
type='context', line=util.fixed_width(line[1:])))
784
elif line.startswith('+'):
785
chunk.diff.append(util.Container(old_lineno=None, new_lineno=new_lineno,
786
type='insert', line=util.fixed_width(line[1:])))
788
elif line.startswith('-'):
789
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=None,
790
type='delete', line=util.fixed_width(line[1:])))
793
chunk.diff.append(util.Container(old_lineno=None, new_lineno=None,
794
type='unknown', line=util.fixed_width(repr(line))))
795
if chunk is not None:
799
def parse_delta(self, delta):
801
Return a nested data structure containing the changes in a delta::
803
added: list((filename, file_id)),
804
renamed: list((old_filename, new_filename, file_id)),
805
deleted: list((filename, file_id)),
818
diff = buffer.getvalue()
819
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid, chunks=process_diff(diff), raw_diff=diff))
816
821
for path, fid, kind in delta.added:
817
822
added.append((rich_filename(path, kind), fid))
819
824
for path, fid, kind, text_modified, meta_modified in delta.modified:
820
modified.append(util.Container(filename=rich_filename(path, kind), file_id=fid))
822
for old_path, new_path, fid, kind, text_modified, meta_modified in delta.renamed:
823
renamed.append((rich_filename(old_path, kind), rich_filename(new_path, kind), fid))
825
handle_modify(path, path, fid, kind)
827
for oldpath, newpath, fid, kind, text_modified, meta_modified in delta.renamed:
828
renamed.append((rich_filename(oldpath, kind), rich_filename(newpath, kind), fid))
824
829
if meta_modified or text_modified:
825
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid))
830
handle_modify(oldpath, newpath, fid, kind)
827
832
for path, fid, kind in delta.removed:
828
833
removed.append((rich_filename(path, kind), fid))
830
835
return util.Container(added=added, renamed=renamed, removed=removed, modified=modified)
835
840
for change in changes:
836
841
for m in change.changes.modified:
837
842
m.sbs_chunks = _make_side_by_side(m.chunks)
839
844
@with_branch_lock
840
def get_filelist(self, inv, file_id, sort_type=None):
845
def get_filelist(self, inv, path, sort_type=None):
842
847
return the list of all files (and their attributes) within a given
846
dir_ie = inv[file_id]
847
path = inv.id2path(file_id)
850
while path.endswith('/'):
852
if path.startswith('/'):
855
entries = inv.entries()
852
for filename, entry in dir_ie.children.iteritems():
853
revid_set.add(entry.revision)
856
for change in self.get_changes(list(revid_set)):
857
change_dict[change.revid] = change
859
for filename, entry in dir_ie.children.iteritems():
858
for filepath, entry in entries:
859
if posixpath.dirname(filepath) != path:
861
filename = posixpath.basename(filepath)
862
rich_filename = filename
860
863
pathname = filename
861
864
if entry.kind == 'directory':
864
867
revid = entry.revision
868
revision = self._branch.repository.get_revision(revid)
866
file = util.Container(
867
filename=filename, executable=entry.executable, kind=entry.kind,
868
pathname=pathname, file_id=entry.file_id, size=entry.text_size,
869
revid=revid, change=change_dict[revid])
870
change = util.Container(date=datetime.datetime.fromtimestamp(revision.timestamp),
871
revno=self.get_revno(revid))
873
file = util.Container(filename=filename, rich_filename=rich_filename, executable=entry.executable, kind=entry.kind,
874
pathname=pathname, file_id=entry.file_id, size=entry.text_size, revid=revid, change=change)
870
875
file_list.append(file)
872
if sort_type == 'filename' or sort_type is None:
877
if sort_type == 'filename':
873
878
file_list.sort(key=lambda x: x.filename)
874
879
elif sort_type == 'size':
875
880
file_list.sort(key=lambda x: x.size)
876
881
elif sort_type == 'date':
877
882
file_list.sort(key=lambda x: x.change.date)
880
885
for file in file_list:
881
886
file.parity = parity