41
from StringIO import StringIO
37
from loggerhead import search
38
43
from loggerhead import util
39
from loggerhead.wholehistory import compute_whole_history_data
44
from loggerhead.util import decorator
47
import bzrlib.annotate
42
48
import bzrlib.branch
49
import bzrlib.bundle.serializer
50
import bzrlib.decorators
44
52
import bzrlib.errors
53
import bzrlib.progress
54
import bzrlib.textfile
59
with_branch_lock = util.with_lock('_lock', 'branch')
62
def with_bzrlib_read_lock(unbound):
63
def bzrlib_read_locked(self, *args, **kw):
64
#self.log.debug('-> %r bzr lock', id(threading.currentThread()))
65
self._branch.repository.lock_read()
67
return unbound(self, *args, **kw)
69
self._branch.repository.unlock()
70
#self.log.debug('<- %r bzr lock', id(threading.currentThread()))
71
return bzrlib_read_locked
74
# bzrlib's UIFactory is not thread-safe
75
uihack = threading.local()
77
class ThreadSafeUIFactory (bzrlib.ui.SilentUIFactory):
78
def nested_progress_bar(self):
79
if getattr(uihack, '_progress_bar_stack', None) is None:
80
uihack._progress_bar_stack = bzrlib.progress.ProgressBarStack(klass=bzrlib.progress.DummyProgress)
81
return uihack._progress_bar_stack.get_nested()
83
bzrlib.ui.ui_factory = ThreadSafeUIFactory()
86
def _process_side_by_side_buffers(line_list, delete_list, insert_list):
87
while len(delete_list) < len(insert_list):
88
delete_list.append((None, '', 'context'))
89
while len(insert_list) < len(delete_list):
90
insert_list.append((None, '', 'context'))
91
while len(delete_list) > 0:
92
d = delete_list.pop(0)
93
i = insert_list.pop(0)
94
line_list.append(util.Container(old_lineno=d[0], new_lineno=i[0],
95
old_line=d[1], new_line=i[1],
96
old_type=d[2], new_type=i[2]))
99
def _make_side_by_side(chunk_list):
101
turn a normal unified-style diff (post-processed by parse_delta) into a
102
side-by-side diff structure. the new structure is::
110
type: str('context' or 'changed'),
115
for chunk in chunk_list:
117
delete_list, insert_list = [], []
118
for line in chunk.diff:
119
if line.type == 'context':
120
if len(delete_list) or len(insert_list):
121
_process_side_by_side_buffers(line_list, delete_list, insert_list)
122
delete_list, insert_list = [], []
123
line_list.append(util.Container(old_lineno=line.old_lineno, new_lineno=line.new_lineno,
124
old_line=line.line, new_line=line.line,
125
old_type=line.type, new_type=line.type))
126
elif line.type == 'delete':
127
delete_list.append((line.old_lineno, line.line, line.type))
128
elif line.type == 'insert':
129
insert_list.append((line.new_lineno, line.line, line.type))
130
if len(delete_list) or len(insert_list):
131
_process_side_by_side_buffers(line_list, delete_list, insert_list)
132
out_chunk_list.append(util.Container(diff=line_list))
133
return out_chunk_list
47
136
def is_branch(folder):
98
169
def __getitem__(self, index):
99
170
"""Get the date of the index'd item"""
100
return datetime.datetime.fromtimestamp(self.repository.get_revision(
101
self.revid_list[index]).timestamp)
171
return datetime.datetime.fromtimestamp(self.repository.get_revision(self.revid_list[index]).timestamp)
103
173
def __len__(self):
104
174
return len(self.revid_list)
106
class FileChangeReporter(object):
107
def __init__(self, old_inv, new_inv):
112
self.text_changes = []
113
self.old_inv = old_inv
114
self.new_inv = new_inv
116
def revid(self, inv, file_id):
118
return inv[file_id].revision
119
except bzrlib.errors.NoSuchId:
122
def report(self, file_id, paths, versioned, renamed, modified,
124
if modified not in ('unchanged', 'kind changed'):
125
if versioned == 'removed':
126
filename = rich_filename(paths[0], kind[0])
128
filename = rich_filename(paths[1], kind[1])
129
self.text_changes.append(util.Container(
130
filename=filename, file_id=file_id,
131
old_revision=self.revid(self.old_inv, file_id),
132
new_revision=self.revid(self.new_inv, file_id)))
133
if versioned == 'added':
134
self.added.append(util.Container(
135
filename=rich_filename(paths[1], kind),
136
file_id=file_id, kind=kind[1]))
137
elif versioned == 'removed':
138
self.removed.append(util.Container(
139
filename=rich_filename(paths[0], kind),
140
file_id=file_id, kind=kind[0]))
142
self.renamed.append(util.Container(
143
old_filename=rich_filename(paths[0], kind[0]),
144
new_filename=rich_filename(paths[1], kind[1]),
146
text_modified=modified == 'modified'))
148
self.modified.append(util.Container(
149
filename=rich_filename(paths[1], kind),
153
class RevInfoMemoryCache(object):
154
"""A store that validates values against the revids they were stored with.
156
We use a unique key for each branch.
158
The reason for not just using the revid as the key is so that when a new
159
value is provided for a branch, we replace the old value used for the
162
There is another implementation of the same interface in
163
loggerhead.changecache.RevInfoDiskCache.
166
def __init__(self, cache):
169
def get(self, key, revid):
170
"""Return the data associated with `key`, subject to a revid check.
172
If a value was stored under `key`, with the same revid, return it.
173
Otherwise return None.
175
cached = self._cache.get(key)
178
stored_revid, data = cached
179
if revid == stored_revid:
184
def set(self, key, revid, data):
185
"""Store `data` under `key`, to be checked against `revid` on get().
187
self._cache[key] = (revid, data)
190
177
class History (object):
191
"""Decorate a branch to provide information for rendering.
193
History objects are expected to be short lived -- when serving a request
194
for a particular branch, open it, read-lock it, wrap a History object
195
around it, serve the request, throw the History object away, unlock the
196
branch and throw it away.
198
:ivar _file_change_cache: An object that caches information about the
199
files that changed between two revisions.
200
:ivar _rev_info: A list of information about revisions. This is by far
201
the most cryptic data structure in loggerhead. At the top level, it
202
is a list of 3-tuples [(merge-info, where-merged, parents)].
203
`merge-info` is (seq, revid, merge_depth, revno_str, end_of_merge) --
204
like a merged sorted list, but the revno is stringified.
205
`where-merged` is a tuple of revisions that have this revision as a
206
non-lefthand parent. Finally, `parents` is just the usual list of
207
parents of this revision.
208
:ivar _rev_indices: A dictionary mapping each revision id to the index of
209
the information about it in _rev_info.
210
:ivar _revno_revid: A dictionary mapping stringified revnos to revision
214
def _load_whole_history_data(self, caches, cache_key):
215
"""Set the attributes relating to the whole history of the branch.
217
:param caches: a list of caches with interfaces like
218
`RevInfoMemoryCache` and be ordered from fastest to slowest.
219
:param cache_key: the key to use with the caches.
221
self._rev_indices = None
222
self._rev_info = None
225
def update_missed_caches():
226
for cache in missed_caches:
227
cache.set(cache_key, self.last_revid, self._rev_info)
229
data = cache.get(cache_key, self.last_revid)
231
self._rev_info = data
232
update_missed_caches()
235
missed_caches.append(cache)
237
whole_history_data = compute_whole_history_data(self._branch)
238
self._rev_info, self._rev_indices = whole_history_data
239
update_missed_caches()
241
if self._rev_indices is not None:
242
self._revno_revid = {}
243
for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
244
self._revno_revid[revno_str] = revid
246
self._revno_revid = {}
247
self._rev_indices = {}
248
for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
249
self._rev_indices[revid] = seq
250
self._revno_revid[revno_str] = revid
252
def __init__(self, branch, whole_history_data_cache, file_cache=None,
253
revinfo_disk_cache=None, cache_key=None):
254
assert branch.is_locked(), (
255
"Can only construct a History object with a read-locked branch.")
256
if file_cache is not None:
257
self._file_change_cache = file_cache
258
file_cache.history = self
260
self._file_change_cache = None
180
self._change_cache = None
182
self._lock = threading.RLock()
185
def from_branch(cls, branch, name=None):
261
188
self._branch = branch
262
self._inventory_cache = {}
263
self._branch_nick = self._branch.get_config().get_nickname()
264
self.log = logging.getLogger('loggerhead.%s' % self._branch_nick)
266
self.last_revid = branch.last_revision()
268
caches = [RevInfoMemoryCache(whole_history_data_cache)]
269
if revinfo_disk_cache:
270
caches.append(revinfo_disk_cache)
271
self._load_whole_history_data(caches, cache_key)
274
def has_revisions(self):
275
return not bzrlib.revision.is_null(self.last_revid)
189
self._history = branch.revision_history()
190
self._last_revid = self._history[-1]
191
self._revision_graph = branch.repository.get_revision_graph(self._last_revid)
194
name = self._branch.nick
196
self.log = logging.getLogger('loggerhead.%s' % (name,))
198
self._full_history = []
199
self._revision_info = {}
200
self._revno_revid = {}
201
self._merge_sort = bzrlib.tsort.merge_sort(self._revision_graph, self._last_revid, generate_revno=True)
203
for (seq, revid, merge_depth, revno, end_of_merge) in self._merge_sort:
204
self._full_history.append(revid)
205
revno_str = '.'.join(str(n) for n in revno)
206
self._revno_revid[revno_str] = revid
207
self._revision_info[revid] = (seq, revid, merge_depth, revno_str, end_of_merge)
212
self._where_merged = {}
213
for revid in self._revision_graph.keys():
214
if not revid in self._full_history:
216
for parent in self._revision_graph[revid]:
217
self._where_merged.setdefault(parent, set()).add(revid)
219
self.log.info('built revision graph cache: %r secs' % (time.time() - z,))
223
def from_folder(cls, path, name=None):
224
b = bzrlib.branch.Branch.open(path)
225
return cls.from_branch(b, name)
228
def out_of_date(self):
229
if self._branch.revision_history()[-1] != self._last_revid:
233
def use_cache(self, cache):
234
self._change_cache = cache
236
def use_search_index(self, index):
241
# called when a new history object needs to be created, because the
242
# branch history has changed. we need to immediately close and stop
243
# using our caches, because a new history object will be created to
244
# replace us, using the same cache files.
245
# (may also be called during server shutdown.)
246
if self._change_cache is not None:
247
self._change_cache.close()
248
self._change_cache = None
249
if self._index is not None:
253
def flush_cache(self):
254
if self._change_cache is None:
256
self._change_cache.flush()
258
def check_rebuild(self):
259
if self._change_cache is not None:
260
self._change_cache.check_rebuild()
261
if self._index is not None:
262
self._index.check_rebuild()
264
last_revid = property(lambda self: self._last_revid, None, None)
266
count = property(lambda self: self._count, None, None)
277
269
def get_config(self):
278
270
return self._branch.get_config()
273
def get_revision(self, revid):
274
return self._branch.repository.get_revision(revid)
280
276
def get_revno(self, revid):
281
if revid not in self._rev_indices:
277
if revid not in self._revision_info:
284
seq = self._rev_indices[revid]
285
revno = self._rev_info[seq][0][3]
288
def get_revids_from(self, revid_list, start_revid):
290
Yield the mainline (wrt start_revid) revisions that merged each
293
if revid_list is None:
294
revid_list = [r[0][1] for r in self._rev_info]
295
revid_set = set(revid_list)
298
def introduced_revisions(revid):
300
seq = self._rev_indices[revid]
301
md = self._rev_info[seq][0][2]
303
while i < len(self._rev_info) and self._rev_info[i][0][2] > md:
304
r.add(self._rev_info[i][0][1])
308
if bzrlib.revision.is_null(revid):
310
if introduced_revisions(revid) & revid_set:
280
seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]
283
def get_sequence(self, revid):
284
seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]
287
def get_revision_history(self):
288
return self._full_history
290
def get_revid_sequence(self, revid_list, revid):
292
given a list of revision ids, return the sequence # of this revid in
301
def get_revids_from(self, revid_list, revid):
303
given a list of revision ids, yield revisions in graph order,
304
starting from revid. the list can be None if you just want to travel
305
across all revisions.
308
if (revid_list is None) or (revid in revid_list):
312
parents = self._rev_info[self._rev_indices[revid]][2]
310
if not self._revision_graph.has_key(revid):
312
parents = self._revision_graph[revid]
313
313
if len(parents) == 0:
315
315
revid = parents[0]
317
318
def get_short_revision_history_by_fileid(self, file_id):
319
# wow. is this really the only way we can get this list? by
320
# man-handling the weave store directly? :-0
318
321
# FIXME: would be awesome if we could get, for a folder, the list of
319
# revisions where items within that folder changed.i
320
possible_keys = [(file_id, revid) for revid in self._rev_indices]
321
get_parent_map = self._branch.repository.texts.get_parent_map
322
# We chunk the requests as this works better with GraphIndex.
323
# See _filter_revisions_touching_file_id in bzrlib/log.py
324
# for more information.
327
for start in xrange(0, len(possible_keys), chunk_size):
328
next_keys = possible_keys[start:start + chunk_size]
329
revids += [k[1] for k in get_parent_map(next_keys)]
330
del possible_keys, next_keys
322
# revisions where items within that folder changed.
323
w = self._branch.repository.weave_store.get_weave(file_id, self._branch.repository.get_transaction())
324
w_revids = w.versions()
325
revids = [r for r in self._full_history if r in w_revids]
333
329
def get_revision_history_since(self, revid_list, date):
334
330
# if a user asks for revisions starting at 01-sep, they mean inclusive,
335
331
# so start at midnight on 02-sep.
336
332
date = date + datetime.timedelta(days=1)
337
# our revid list is sorted in REVERSE date order,
338
# so go thru some hoops here...
333
# our revid list is sorted in REVERSE date order, so go thru some hoops here...
339
334
revid_list.reverse()
340
index = bisect.bisect(_RevListToTimestamps(revid_list,
341
self._branch.repository),
335
index = bisect.bisect(_RevListToTimestamps(revid_list, self._branch.repository), date)
345
338
revid_list.reverse()
347
340
return revid_list[index:]
343
def get_revision_history_matching(self, revid_list, text):
344
self.log.debug('searching %d revisions for %r', len(revid_list), text)
346
# this is going to be painfully slow. :(
349
for revid in revid_list:
350
change = self.get_changes([ revid ])[0]
351
if text in change.comment.lower():
353
self.log.debug('searched %d revisions for %r in %r secs', len(revid_list), text, time.time() - z)
356
def get_revision_history_matching_indexed(self, revid_list, text):
357
self.log.debug('searching %d revisions for %r', len(revid_list), text)
359
if self._index is None:
360
return self.get_revision_history_matching(revid_list, text)
361
out = self._index.find(text, revid_list)
362
self.log.debug('searched %d revisions for %r in %r secs: %d results', len(revid_list), text, time.time() - z, len(out))
363
# put them in some coherent order :)
364
out = [r for r in self._full_history if r in out]
349
368
def get_search_revid_list(self, query, revid_list):
351
370
given a "quick-search" query, try a few obvious possible meanings:
353
372
- revision id or # ("128.1.3")
354
- date (US style "mm/dd/yy", earth style "dd-mm-yy", or \
355
iso style "yyyy-mm-dd")
373
- date (US style "mm/dd/yy", earth style "dd-mm-yy", or iso style "yyyy-mm-dd")
356
374
- comment text as a fallback
358
376
and return a revid list that matches.
408
421
# if a "revid" is actually a dotted revno, convert it to a revid
409
422
if revid is None:
412
return self.last_revid
414
if self.revno_re.match(revid):
415
revid = self._revno_revid[revid]
417
raise bzrlib.errors.NoSuchRevision(self._branch_nick, revid)
424
if self.revno_re.match(revid):
425
revid = self._revno_revid[revid]
420
429
def get_file_view(self, revid, file_id):
422
Given a revid and optional path, return a (revlist, revid) for
423
navigation through the current scope: from the revid (or the latest
424
revision) back to the original revision.
431
Given an optional revid and optional path, return a (revlist, revid)
432
for navigation through the current scope: from the revid (or the
433
latest revision) back to the original revision.
426
435
If file_id is None, the entire revision history is the list scope.
436
If revid is None, the latest revision is used.
428
438
if revid is None:
429
revid = self.last_revid
439
revid = self._last_revid
430
440
if file_id is not None:
431
# since revid is 'start_revid', possibly should start the path
432
# tracing from revid... FIXME
441
# since revid is 'start_revid', possibly should start the path tracing from revid... FIXME
442
inv = self._branch.repository.get_revision_inventory(revid)
433
443
revlist = list(self.get_short_revision_history_by_fileid(file_id))
434
444
revlist = list(self.get_revids_from(revlist, revid))
436
446
revlist = list(self.get_revids_from(None, revid))
449
return revlist, revid
439
452
def get_view(self, revid, start_revid, file_id, query=None):
441
454
use the URL parameters (revid, start_revid, file_id, and query) to
442
455
determine the revision list we're viewing (start_revid, file_id, query)
443
456
and where we are in it (revid).
445
- if a query is given, we're viewing query results.
446
- if a file_id is given, we're viewing revisions for a specific
448
- if a start_revid is given, we're viewing the branch from a
449
specific revision up the tree.
451
these may be combined to view revisions for a specific file, from
452
a specific revision, with a specific search query.
454
returns a new (revid, start_revid, revid_list) where:
458
if a query is given, we're viewing query results.
459
if a file_id is given, we're viewing revisions for a specific file.
460
if a start_revid is given, we're viewing the branch from a
461
specific revision up the tree.
462
(these may be combined to view revisions for a specific file, from
463
a specific revision, with a specific search query.)
465
returns a new (revid, start_revid, revid_list, scan_list) where:
456
467
- revid: current position within the view
457
468
- start_revid: starting revision of this view
458
469
- revid_list: list of revision ids for this view
460
471
file_id and query are never changed so aren't returned, but they may
461
472
contain vital context for future url navigation.
463
if start_revid is None:
464
start_revid = self.last_revid
466
474
if query is None:
467
revid_list = self.get_file_view(start_revid, file_id)
475
revid_list, start_revid = self.get_file_view(start_revid, file_id)
468
476
if revid is None:
469
477
revid = start_revid
470
478
if revid not in revid_list:
471
479
# if the given revid is not in the revlist, use a revlist that
472
480
# starts at the given revid.
473
revid_list = self.get_file_view(revid, file_id)
481
revid_list, start_revid = self.get_file_view(revid, file_id)
475
482
return revid, start_revid, revid_list
477
484
# potentially limit the search
478
if file_id is not None:
479
revid_list = self.get_file_view(start_revid, file_id)
485
if (start_revid is not None) or (file_id is not None):
486
revid_list, start_revid = self.get_file_view(start_revid, file_id)
481
488
revid_list = None
482
revid_list = search.search_revisions(self._branch, query)
483
if revid_list and len(revid_list) > 0:
490
revid_list = self.get_search_revid_list(query, revid_list)
491
if len(revid_list) > 0:
484
492
if revid not in revid_list:
485
493
revid = revid_list[0]
486
494
return revid, start_revid, revid_list
488
# XXX: This should return a message saying that the search could
489
# not be completed due to either missing the plugin or missing a
491
497
return None, None, []
493
500
def get_inventory(self, revid):
494
if revid not in self._inventory_cache:
495
self._inventory_cache[revid] = (
496
self._branch.repository.get_revision_inventory(revid))
497
return self._inventory_cache[revid]
501
return self._branch.repository.get_revision_inventory(revid)
499
504
def get_path(self, revid, file_id):
500
505
if (file_id is None) or (file_id == ''):
502
path = self.get_inventory(revid).id2path(file_id)
507
path = self._branch.repository.get_revision_inventory(revid).id2path(file_id)
503
508
if (len(path) > 0) and not path.startswith('/'):
504
509
path = '/' + path
507
def get_file_id(self, revid, path):
508
if (len(path) > 0) and not path.startswith('/'):
510
return self.get_inventory(revid).path2id(path)
512
def get_where_merged(self, revid):
514
return self._where_merged[revid]
512
518
def get_merge_point_list(self, revid):
514
520
Return the list of revids that have merged this node.
516
if '.' not in self.get_revno(revid):
522
if revid in self._history:
521
children = self._rev_info[self._rev_indices[revid]][1]
527
children = self.get_where_merged(revid)
523
529
for child in children:
524
child_parents = self._rev_info[self._rev_indices[child]][2]
530
child_parents = self._revision_graph[child]
525
531
if child_parents[0] == revid:
526
532
nexts.append(child)
547
553
revnol = revno.split(".")
548
554
revnos = ".".join(revnol[:-2])
549
555
revnolast = int(revnol[-1])
550
if revnos in d.keys():
556
if d.has_key(revnos):
552
558
if revnolast < m:
553
d[revnos] = (revnolast, revid)
559
d[revnos] = ( revnolast, revid )
555
d[revnos] = (revnolast, revid)
557
return [d[revnos][1] for revnos in d.keys()]
559
def add_branch_nicks(self, change):
561
d[revnos] = ( revnolast, revid )
563
return [ d[revnos][1] for revnos in d.keys() ]
565
def get_branch_nicks(self, changes):
561
given a 'change', fill in the branch nicks on all parents and merge
567
given a list of changes from L{get_changes}, fill in the branch nicks
568
on all parents and merge points.
564
570
fetch_set = set()
565
for p in change.parents:
566
fetch_set.add(p.revid)
567
for p in change.merge_points:
568
fetch_set.add(p.revid)
571
for change in changes:
572
for p in change.parents:
573
fetch_set.add(p.revid)
574
for p in change.merge_points:
575
fetch_set.add(p.revid)
569
576
p_changes = self.get_changes(list(fetch_set))
570
577
p_change_dict = dict([(c.revid, c) for c in p_changes])
571
for p in change.parents:
572
if p.revid in p_change_dict:
573
p.branch_nick = p_change_dict[p.revid].branch_nick
575
p.branch_nick = '(missing)'
576
for p in change.merge_points:
577
if p.revid in p_change_dict:
578
p.branch_nick = p_change_dict[p.revid].branch_nick
580
p.branch_nick = '(missing)'
582
def get_changes(self, revid_list):
583
"""Return a list of changes objects for the given revids.
585
Revisions not present and NULL_REVISION will be ignored.
587
changes = self.get_changes_uncached(revid_list)
588
if len(changes) == 0:
578
for change in changes:
579
# arch-converted branches may not have merged branch info :(
580
for p in change.parents:
581
if p.revid in p_change_dict:
582
p.branch_nick = p_change_dict[p.revid].branch_nick
584
p.branch_nick = '(missing)'
585
for p in change.merge_points:
586
if p.revid in p_change_dict:
587
p.branch_nick = p_change_dict[p.revid].branch_nick
589
p.branch_nick = '(missing)'
592
def get_changes(self, revid_list, get_diffs=False):
593
if self._change_cache is None:
594
changes = self.get_changes_uncached(revid_list, get_diffs)
596
changes = self._change_cache.get_changes(revid_list, get_diffs)
591
600
# some data needs to be recalculated each time, because it may
592
601
# change as new revisions are added.
593
for change in changes:
594
merge_revids = self.simplify_merge_point_list(
595
self.get_merge_point_list(change.revid))
596
change.merge_points = [
597
util.Container(revid=r,
598
revno=self.get_revno(r)) for r in merge_revids]
599
if len(change.parents) > 0:
600
change.parents = [util.Container(revid=r,
601
revno=self.get_revno(r)) for r in change.parents]
602
change.revno = self.get_revno(change.revid)
605
for change in changes:
606
change.parity = parity
602
for i in xrange(len(revid_list)):
603
revid = revid_list[i]
605
merge_revids = self.simplify_merge_point_list(self.get_merge_point_list(revid))
606
change.merge_points = [util.Container(revid=r, revno=self.get_revno(r)) for r in merge_revids]
611
def get_changes_uncached(self, revid_list):
612
# FIXME: deprecated method in getting a null revision
613
revid_list = filter(lambda revid: not bzrlib.revision.is_null(revid),
615
parent_map = self._branch.repository.get_graph().get_parent_map(
617
# We need to return the answer in the same order as the input,
619
present_revids = [revid for revid in revid_list
620
if revid in parent_map]
621
rev_list = self._branch.repository.get_revisions(present_revids)
623
return [self._change_from_revision(rev) for rev in rev_list]
625
def _change_from_revision(self, revision):
627
Given a bzrlib Revision, return a processed "change" for use in
610
# alright, let's profile this sucka.
611
def _get_changes_profiled(self, revid_list, get_diffs=False):
612
from loggerhead.lsprof import profile
614
ret, stats = profile(self.get_changes_uncached, revid_list, get_diffs)
617
cPickle.dump(stats, open('lsprof.stats', 'w'), 2)
618
self.log.info('lsprof complete!')
621
def _get_deltas_for_revisions_with_trees(self, revisions):
622
"""Produce a generator of revision deltas.
624
Note that the input is a sequence of REVISIONS, not revision_ids.
625
Trees will be held in memory until the generator exits.
626
Each delta is relative to the revision's lefthand predecessor.
628
required_trees = set()
629
for revision in revisions:
630
required_trees.add(revision.revision_id)
631
required_trees.update(revision.parent_ids[:1])
632
trees = dict((t.get_revision_id(), t) for
633
t in self._branch.repository.revision_trees(required_trees))
635
self._branch.repository.lock_read()
637
for revision in revisions:
638
if not revision.parent_ids:
639
old_tree = self._branch.repository.revision_tree(None)
641
old_tree = trees[revision.parent_ids[0]]
642
tree = trees[revision.revision_id]
643
ret.append((tree, old_tree, tree.changes_from(old_tree)))
646
self._branch.repository.unlock()
648
def entry_from_revision(self, revision):
649
commit_time = datetime.datetime.fromtimestamp(revision.timestamp)
651
parents = [util.Container(revid=r, revno=self.get_revno(r)) for r in revision.parent_ids]
653
if len(parents) == 0:
656
left_parent = revision.parent_ids[0]
630
658
message, short_message = clean_message(revision.message)
632
tags = self._branch.tags.get_reverse_tag_dict()
635
if tags.has_key(revision.revision_id):
636
revtags = ', '.join(tags[revision.revision_id])
639
661
'revid': revision.revision_id,
640
'date': datetime.datetime.fromtimestamp(revision.timestamp),
641
'utc_date': datetime.datetime.utcfromtimestamp(revision.timestamp),
642
'authors': revision.get_apparent_authors(),
662
'revno': self.get_revno(revision.revision_id),
664
'author': revision.committer,
643
665
'branch_nick': revision.properties.get('branch-nick', None),
644
666
'short_comment': short_message,
645
667
'comment': revision.message,
646
668
'comment_clean': [util.html_clean(s) for s in message],
647
'parents': revision.parent_ids,
648
'bugs': [bug.split()[0] for bug in revision.properties.get('bugs', '').splitlines()],
651
671
return util.Container(entry)
653
def get_file_changes_uncached(self, entry):
655
old_revid = entry.parents[0].revid
657
old_revid = bzrlib.revision.NULL_REVISION
658
return self.file_changes_for_revision_ids(old_revid, entry.revid)
660
def get_file_changes(self, entry):
661
if self._file_change_cache is None:
662
return self.get_file_changes_uncached(entry)
664
return self._file_change_cache.get_file_changes(entry)
666
def add_changes(self, entry):
667
changes = self.get_file_changes(entry)
668
entry.changes = changes
674
@with_bzrlib_read_lock
675
def get_changes_uncached(self, revid_list, get_diffs=False):
679
rev_list = self._branch.repository.get_revisions(revid_list)
681
except (KeyError, bzrlib.errors.NoSuchRevision), e:
682
# this sometimes happens with arch-converted branches.
683
# i don't know why. :(
684
self.log.debug('No such revision (skipping): %s', e)
685
revid_list.remove(e.revision)
687
delta_list = self._get_deltas_for_revisions_with_trees(rev_list)
688
combined_list = zip(rev_list, delta_list)
691
for rev, (new_tree, old_tree, delta) in combined_list:
692
entry = self.entry_from_revision(rev)
693
entry.changes = self.parse_delta(delta, get_diffs, old_tree, new_tree)
694
entries.append(entry)
698
@with_bzrlib_read_lock
699
def _get_diff(self, revid1, revid2):
700
rev_tree1 = self._branch.repository.revision_tree(revid1)
701
rev_tree2 = self._branch.repository.revision_tree(revid2)
702
delta = rev_tree2.changes_from(rev_tree1)
703
return rev_tree1, rev_tree2, delta
705
def get_diff(self, revid1, revid2):
706
rev_tree1, rev_tree2, delta = self._get_diff(revid1, revid2)
707
entry = self.get_changes([ revid2 ], False)[0]
708
entry.changes = self.parse_delta(delta, True, rev_tree1, rev_tree2)
670
712
def get_file(self, file_id, revid):
671
713
"returns (path, filename, data)"
672
714
inv = self.get_inventory(revid)
676
718
if not path.startswith('/'):
677
719
path = '/' + path
678
720
return path, inv_entry.name, rev_tree.get_file_text(file_id)
680
def file_changes_for_revision_ids(self, old_revid, new_revid):
723
def parse_delta(self, delta, get_diffs=True, old_tree=None, new_tree=None):
682
725
Return a nested data structure containing the changes in a delta::
684
727
added: list((filename, file_id)),
685
728
renamed: list((old_filename, new_filename, file_id)),
686
729
deleted: list((filename, file_id)),
691
text_changes: list((filename, file_id)),
693
repo = self._branch.repository
694
if bzrlib.revision.is_null(old_revid) or \
695
bzrlib.revision.is_null(new_revid):
696
old_tree, new_tree = map(
697
repo.revision_tree, [old_revid, new_revid])
699
old_tree, new_tree = repo.revision_trees([old_revid, new_revid])
701
reporter = FileChangeReporter(old_tree.inventory, new_tree.inventory)
703
bzrlib.delta.report_changes(new_tree.iter_changes(old_tree), reporter)
705
return util.Container(
706
added=sorted(reporter.added, key=lambda x:x.filename),
707
renamed=sorted(reporter.renamed, key=lambda x:x.new_filename),
708
removed=sorted(reporter.removed, key=lambda x:x.filename),
709
modified=sorted(reporter.modified, key=lambda x:x.filename),
710
text_changes=sorted(reporter.text_changes, key=lambda x:x.filename))
737
type: str('context', 'delete', or 'insert'),
743
if C{get_diffs} is false, the C{chunks} will be omitted.
750
def rich_filename(path, kind):
751
if kind == 'directory':
753
if kind == 'symlink':
757
def process_diff(diff):
760
for line in diff.splitlines():
763
if line.startswith('+++ ') or line.startswith('--- '):
765
if line.startswith('@@ '):
767
if chunk is not None:
769
chunk = util.Container()
771
lines = [int(x.split(',')[0][1:]) for x in line.split(' ')[1:3]]
772
old_lineno = lines[0]
773
new_lineno = lines[1]
774
elif line.startswith(' '):
775
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=new_lineno,
776
type='context', line=util.html_clean(line[1:])))
779
elif line.startswith('+'):
780
chunk.diff.append(util.Container(old_lineno=None, new_lineno=new_lineno,
781
type='insert', line=util.html_clean(line[1:])))
783
elif line.startswith('-'):
784
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=None,
785
type='delete', line=util.html_clean(line[1:])))
788
chunk.diff.append(util.Container(old_lineno=None, new_lineno=None,
789
type='unknown', line=util.html_clean(repr(line))))
790
if chunk is not None:
794
def handle_modify(old_path, new_path, fid, kind):
796
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid))
798
old_lines = old_tree.get_file_lines(fid)
799
new_lines = new_tree.get_file_lines(fid)
801
bzrlib.diff.internal_diff(old_path, old_lines, new_path, new_lines, buffer)
802
diff = buffer.getvalue()
803
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid, chunks=process_diff(diff), raw_diff=diff))
805
for path, fid, kind in delta.added:
806
added.append((rich_filename(path, kind), fid))
808
for path, fid, kind, text_modified, meta_modified in delta.modified:
809
handle_modify(path, path, fid, kind)
811
for oldpath, newpath, fid, kind, text_modified, meta_modified in delta.renamed:
812
renamed.append((rich_filename(oldpath, kind), rich_filename(newpath, kind), fid))
813
if meta_modified or text_modified:
814
handle_modify(oldpath, newpath, fid, kind)
816
for path, fid, kind in delta.removed:
817
removed.append((rich_filename(path, kind), fid))
819
return util.Container(added=added, renamed=renamed, removed=removed, modified=modified)
822
def add_side_by_side(changes):
823
# FIXME: this is a rotten API.
824
for change in changes:
825
for m in change.changes.modified:
826
m.sbs_chunks = _make_side_by_side(m.chunks)
829
def get_filelist(self, inv, path, sort_type=None):
831
return the list of all files (and their attributes) within a given
834
while path.endswith('/'):
836
if path.startswith('/'):
839
entries = inv.entries()
842
for filepath, entry in entries:
843
fetch_set.add(entry.revision)
844
change_dict = dict([(c.revid, c) for c in self.get_changes(list(fetch_set))])
847
for filepath, entry in entries:
848
if posixpath.dirname(filepath) != path:
850
filename = posixpath.basename(filepath)
851
rich_filename = filename
853
if entry.kind == 'directory':
857
revid = entry.revision
858
change = change_dict[revid]
860
file = util.Container(filename=filename, rich_filename=rich_filename, executable=entry.executable, kind=entry.kind,
861
pathname=pathname, file_id=entry.file_id, size=entry.text_size, revid=revid, change=change)
862
file_list.append(file)
864
if sort_type == 'filename':
865
file_list.sort(key=lambda x: x.filename)
866
elif sort_type == 'size':
867
file_list.sort(key=lambda x: x.size)
868
elif sort_type == 'date':
869
file_list.sort(key=lambda x: x.change.date)
872
for file in file_list:
879
_BADCHARS_RE = re.compile(ur'[\x00-\x08\x0b-\x0c\x0e-\x1f]')
882
def annotate_file(self, file_id, revid):
887
file_revid = self.get_inventory(revid)[file_id].revision
890
# because we cache revision metadata ourselves, it's actually much
891
# faster to call 'annotate_iter' on the weave directly than it is to
892
# ask bzrlib to annotate for us.
893
w = self._branch.repository.weave_store.get_weave(file_id, self._branch.repository.get_transaction())
896
for line_revid, text in w.annotate_iter(file_revid):
897
revid_set.add(line_revid)
898
if self._BADCHARS_RE.match(text):
899
# bail out; this isn't displayable text
900
yield util.Container(parity=0, lineno=1, status='same',
901
text='<i>' + util.html_clean('(This is a binary file.)') + '</i>',
902
change=util.Container())
904
change_cache = dict([(c.revid, c) for c in self.get_changes(list(revid_set))])
906
last_line_revid = None
907
for line_revid, text in w.annotate_iter(file_revid):
908
if line_revid == last_line_revid:
909
# remember which lines have a new revno and which don't
914
last_line_revid = line_revid
915
change = change_cache[line_revid]
916
trunc_revno = change.revno
917
if len(trunc_revno) > 10:
918
trunc_revno = trunc_revno[:9] + '...'
920
yield util.Container(parity=parity, lineno=lineno, status=status,
921
change=change, text=util.html_clean(text))
924
self.log.debug('annotate: %r secs' % (time.time() - z,))
927
@with_bzrlib_read_lock
928
def get_bundle(self, revid, compare_revid=None):
929
if compare_revid is None:
930
parents = self._revision_graph[revid]
932
compare_revid = parents[0]
936
bzrlib.bundle.serializer.write_bundle(self._branch.repository, revid, compare_revid, s)