41
from StringIO import StringIO
43
from loggerhead import util
44
from loggerhead.util import decorator
47
import bzrlib.annotate
48
38
import bzrlib.branch
49
import bzrlib.bundle.serializer
50
import bzrlib.decorators
52
40
import bzrlib.errors
53
import bzrlib.progress
54
import bzrlib.textfile
59
with_branch_lock = util.with_lock('_lock', 'branch')
62
def with_bzrlib_read_lock(unbound):
63
def bzrlib_read_locked(self, *args, **kw):
64
#self.log.debug('-> %r bzr lock', id(threading.currentThread()))
65
self._branch.repository.lock_read()
67
return unbound(self, *args, **kw)
69
self._branch.repository.unlock()
70
#self.log.debug('<- %r bzr lock', id(threading.currentThread()))
71
return bzrlib_read_locked
74
# bzrlib's UIFactory is not thread-safe
75
uihack = threading.local()
77
class ThreadSafeUIFactory (bzrlib.ui.SilentUIFactory):
78
def nested_progress_bar(self):
79
if getattr(uihack, '_progress_bar_stack', None) is None:
80
uihack._progress_bar_stack = bzrlib.progress.ProgressBarStack(klass=bzrlib.progress.DummyProgress)
81
return uihack._progress_bar_stack.get_nested()
83
bzrlib.ui.ui_factory = ThreadSafeUIFactory()
86
def _process_side_by_side_buffers(line_list, delete_list, insert_list):
87
while len(delete_list) < len(insert_list):
88
delete_list.append((None, '', 'context'))
89
while len(insert_list) < len(delete_list):
90
insert_list.append((None, '', 'context'))
91
while len(delete_list) > 0:
92
d = delete_list.pop(0)
93
i = insert_list.pop(0)
94
line_list.append(util.Container(old_lineno=d[0], new_lineno=i[0],
95
old_line=d[1], new_line=i[1],
96
old_type=d[2], new_type=i[2]))
99
def _make_side_by_side(chunk_list):
101
turn a normal unified-style diff (post-processed by parse_delta) into a
102
side-by-side diff structure. the new structure is::
110
type: str('context' or 'changed'),
115
for chunk in chunk_list:
117
delete_list, insert_list = [], []
118
for line in chunk.diff:
119
if line.type == 'context':
120
if len(delete_list) or len(insert_list):
121
_process_side_by_side_buffers(line_list, delete_list, insert_list)
122
delete_list, insert_list = [], []
123
line_list.append(util.Container(old_lineno=line.old_lineno, new_lineno=line.new_lineno,
124
old_line=line.line, new_line=line.line,
125
old_type=line.type, new_type=line.type))
126
elif line.type == 'delete':
127
delete_list.append((line.old_lineno, line.line, line.type))
128
elif line.type == 'insert':
129
insert_list.append((line.new_lineno, line.line, line.type))
130
if len(delete_list) or len(insert_list):
131
_process_side_by_side_buffers(line_list, delete_list, insert_list)
132
out_chunk_list.append(util.Container(diff=line_list))
133
return out_chunk_list
42
import bzrlib.revision
44
from loggerhead import search
45
from loggerhead import util
46
from loggerhead.wholehistory import compute_whole_history_data
136
49
def is_branch(folder):
169
100
def __getitem__(self, index):
170
101
"""Get the date of the index'd item"""
171
return datetime.datetime.fromtimestamp(self.repository.get_revision(self.revid_list[index]).timestamp)
102
return datetime.datetime.fromtimestamp(self.repository.get_revision(
103
self.revid_list[index]).timestamp)
173
105
def __len__(self):
174
106
return len(self.revid_list)
177
class History (object):
180
self._change_cache = None
182
self._lock = threading.RLock()
185
def from_branch(cls, branch, name=None):
108
class FileChangeReporter(object):
110
def __init__(self, old_inv, new_inv):
115
self.text_changes = []
116
self.old_inv = old_inv
117
self.new_inv = new_inv
119
def revid(self, inv, file_id):
121
return inv[file_id].revision
122
except bzrlib.errors.NoSuchId:
125
def report(self, file_id, paths, versioned, renamed, modified,
127
if modified not in ('unchanged', 'kind changed'):
128
if versioned == 'removed':
129
filename = rich_filename(paths[0], kind[0])
131
filename = rich_filename(paths[1], kind[1])
132
self.text_changes.append(util.Container(
133
filename=filename, file_id=file_id,
134
old_revision=self.revid(self.old_inv, file_id),
135
new_revision=self.revid(self.new_inv, file_id)))
136
if versioned == 'added':
137
self.added.append(util.Container(
138
filename=rich_filename(paths[1], kind),
139
file_id=file_id, kind=kind[1]))
140
elif versioned == 'removed':
141
self.removed.append(util.Container(
142
filename=rich_filename(paths[0], kind),
143
file_id=file_id, kind=kind[0]))
145
self.renamed.append(util.Container(
146
old_filename=rich_filename(paths[0], kind[0]),
147
new_filename=rich_filename(paths[1], kind[1]),
149
text_modified=modified == 'modified'))
151
self.modified.append(util.Container(
152
filename=rich_filename(paths[1], kind),
156
class RevInfoMemoryCache(object):
157
"""A store that validates values against the revids they were stored with.
159
We use a unique key for each branch.
161
The reason for not just using the revid as the key is so that when a new
162
value is provided for a branch, we replace the old value used for the
165
There is another implementation of the same interface in
166
loggerhead.changecache.RevInfoDiskCache.
169
def __init__(self, cache):
172
def get(self, key, revid):
173
"""Return the data associated with `key`, subject to a revid check.
175
If a value was stored under `key`, with the same revid, return it.
176
Otherwise return None.
178
cached = self._cache.get(key)
181
stored_revid, data = cached
182
if revid == stored_revid:
187
def set(self, key, revid, data):
188
"""Store `data` under `key`, to be checked against `revid` on get().
190
self._cache[key] = (revid, data)
192
# Used to store locks that prevent multiple threads from building a
193
# revision graph for the same branch at the same time, because that can
194
# cause severe performance issues that are so bad that the system seems
196
revision_graph_locks = {}
197
revision_graph_check_lock = threading.Lock()
199
class History(object):
200
"""Decorate a branch to provide information for rendering.
202
History objects are expected to be short lived -- when serving a request
203
for a particular branch, open it, read-lock it, wrap a History object
204
around it, serve the request, throw the History object away, unlock the
205
branch and throw it away.
207
:ivar _file_change_cache: An object that caches information about the
208
files that changed between two revisions.
209
:ivar _rev_info: A list of information about revisions. This is by far
210
the most cryptic data structure in loggerhead. At the top level, it
211
is a list of 3-tuples [(merge-info, where-merged, parents)].
212
`merge-info` is (seq, revid, merge_depth, revno_str, end_of_merge) --
213
like a merged sorted list, but the revno is stringified.
214
`where-merged` is a tuple of revisions that have this revision as a
215
non-lefthand parent. Finally, `parents` is just the usual list of
216
parents of this revision.
217
:ivar _rev_indices: A dictionary mapping each revision id to the index of
218
the information about it in _rev_info.
219
:ivar _revno_revid: A dictionary mapping stringified revnos to revision
223
def _load_whole_history_data(self, caches, cache_key):
224
"""Set the attributes relating to the whole history of the branch.
226
:param caches: a list of caches with interfaces like
227
`RevInfoMemoryCache` and be ordered from fastest to slowest.
228
:param cache_key: the key to use with the caches.
230
self._rev_indices = None
231
self._rev_info = None
234
def update_missed_caches():
235
for cache in missed_caches:
236
cache.set(cache_key, self.last_revid, self._rev_info)
238
# Theoretically, it's possible for two threads to race in creating
239
# the Lock() object for their branch, so we put a lock around
240
# creating the per-branch Lock().
241
revision_graph_check_lock.acquire()
243
if cache_key not in revision_graph_locks:
244
revision_graph_locks[cache_key] = threading.Lock()
246
revision_graph_check_lock.release()
248
revision_graph_locks[cache_key].acquire()
251
data = cache.get(cache_key, self.last_revid)
253
self._rev_info = data
254
update_missed_caches()
257
missed_caches.append(cache)
259
whole_history_data = compute_whole_history_data(self._branch)
260
self._rev_info, self._rev_indices = whole_history_data
261
update_missed_caches()
263
revision_graph_locks[cache_key].release()
265
if self._rev_indices is not None:
266
self._revno_revid = {}
267
for ((_, revid, _, revno_str, _), _, _) in self._rev_info:
268
self._revno_revid[revno_str] = revid
270
self._revno_revid = {}
271
self._rev_indices = {}
272
for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:
273
self._rev_indices[revid] = seq
274
self._revno_revid[revno_str] = revid
276
def __init__(self, branch, whole_history_data_cache, file_cache=None,
277
revinfo_disk_cache=None, cache_key=None):
278
assert branch.is_locked(), (
279
"Can only construct a History object with a read-locked branch.")
280
if file_cache is not None:
281
self._file_change_cache = file_cache
282
file_cache.history = self
284
self._file_change_cache = None
188
285
self._branch = branch
189
self._history = branch.revision_history()
190
self._last_revid = self._history[-1]
191
self._revision_graph = branch.repository.get_revision_graph(self._last_revid)
194
name = self._branch.nick
196
self.log = logging.getLogger('loggerhead.%s' % (name,))
198
self._full_history = []
199
self._revision_info = {}
200
self._revno_revid = {}
201
self._merge_sort = bzrlib.tsort.merge_sort(self._revision_graph, self._last_revid, generate_revno=True)
203
for (seq, revid, merge_depth, revno, end_of_merge) in self._merge_sort:
204
self._full_history.append(revid)
205
revno_str = '.'.join(str(n) for n in revno)
206
self._revno_revid[revno_str] = revid
207
self._revision_info[revid] = (seq, revid, merge_depth, revno_str, end_of_merge)
212
self._where_merged = {}
213
for revid in self._revision_graph.keys():
214
if not revid in self._full_history:
216
for parent in self._revision_graph[revid]:
217
self._where_merged.setdefault(parent, set()).add(revid)
219
self.log.info('built revision graph cache: %r secs' % (time.time() - z,))
223
def from_folder(cls, path, name=None):
224
b = bzrlib.branch.Branch.open(path)
225
return cls.from_branch(b, name)
228
def out_of_date(self):
229
if self._branch.revision_history()[-1] != self._last_revid:
233
def use_cache(self, cache):
234
self._change_cache = cache
236
def use_search_index(self, index):
241
# called when a new history object needs to be created, because the
242
# branch history has changed. we need to immediately close and stop
243
# using our caches, because a new history object will be created to
244
# replace us, using the same cache files.
245
# (may also be called during server shutdown.)
246
if self._change_cache is not None:
247
self._change_cache.close()
248
self._change_cache = None
249
if self._index is not None:
253
def flush_cache(self):
254
if self._change_cache is None:
256
self._change_cache.flush()
258
def check_rebuild(self):
259
if self._change_cache is not None:
260
self._change_cache.check_rebuild()
261
if self._index is not None:
262
self._index.check_rebuild()
264
last_revid = property(lambda self: self._last_revid, None, None)
266
count = property(lambda self: self._count, None, None)
286
self._inventory_cache = {}
287
self._branch_nick = self._branch.get_config().get_nickname()
288
self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,))
290
self.last_revid = branch.last_revision()
292
caches = [RevInfoMemoryCache(whole_history_data_cache)]
293
if revinfo_disk_cache:
294
caches.append(revinfo_disk_cache)
295
self._load_whole_history_data(caches, cache_key)
298
def has_revisions(self):
299
return not bzrlib.revision.is_null(self.last_revid)
269
301
def get_config(self):
270
302
return self._branch.get_config()
273
def get_revision(self, revid):
274
return self._branch.repository.get_revision(revid)
276
304
def get_revno(self, revid):
277
if revid not in self._revision_info:
305
if revid not in self._rev_indices:
280
seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]
283
def get_sequence(self, revid):
284
seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]
287
def get_revision_history(self):
288
return self._full_history
290
def get_revid_sequence(self, revid_list, revid):
292
given a list of revision ids, return the sequence # of this revid in
301
def get_revids_from(self, revid_list, revid):
303
given a list of revision ids, yield revisions in graph order,
304
starting from revid. the list can be None if you just want to travel
305
across all revisions.
308
seq = self._rev_indices[revid]
309
revno = self._rev_info[seq][0][3]
312
def get_revids_from(self, revid_list, start_revid):
314
Yield the mainline (wrt start_revid) revisions that merged each
317
if revid_list is None:
318
revid_list = [r[0][1] for r in self._rev_info]
319
revid_set = set(revid_list)
322
def introduced_revisions(revid):
324
seq = self._rev_indices[revid]
325
md = self._rev_info[seq][0][2]
327
while i < len(self._rev_info) and self._rev_info[i][0][2] > md:
328
r.add(self._rev_info[i][0][1])
308
if (revid_list is None) or (revid in revid_list):
332
if bzrlib.revision.is_null(revid):
334
if introduced_revisions(revid) & revid_set:
310
if not self._revision_graph.has_key(revid):
312
parents = self._revision_graph[revid]
336
parents = self._rev_info[self._rev_indices[revid]][2]
313
337
if len(parents) == 0:
315
339
revid = parents[0]
318
341
def get_short_revision_history_by_fileid(self, file_id):
319
# wow. is this really the only way we can get this list? by
320
# man-handling the weave store directly? :-0
321
342
# FIXME: would be awesome if we could get, for a folder, the list of
322
# revisions where items within that folder changed.
323
w = self._branch.repository.weave_store.get_weave(file_id, self._branch.repository.get_transaction())
324
w_revids = w.versions()
325
revids = [r for r in self._full_history if r in w_revids]
343
# revisions where items within that folder changed.i
344
possible_keys = [(file_id, revid) for revid in self._rev_indices]
345
get_parent_map = self._branch.repository.texts.get_parent_map
346
# We chunk the requests as this works better with GraphIndex.
347
# See _filter_revisions_touching_file_id in bzrlib/log.py
348
# for more information.
351
for start in xrange(0, len(possible_keys), chunk_size):
352
next_keys = possible_keys[start:start + chunk_size]
353
revids += [k[1] for k in get_parent_map(next_keys)]
354
del possible_keys, next_keys
329
357
def get_revision_history_since(self, revid_list, date):
330
358
# if a user asks for revisions starting at 01-sep, they mean inclusive,
331
359
# so start at midnight on 02-sep.
332
360
date = date + datetime.timedelta(days=1)
333
# our revid list is sorted in REVERSE date order, so go thru some hoops here...
361
# our revid list is sorted in REVERSE date order,
362
# so go thru some hoops here...
334
363
revid_list.reverse()
335
index = bisect.bisect(_RevListToTimestamps(revid_list, self._branch.repository), date)
364
index = bisect.bisect(_RevListToTimestamps(revid_list,
365
self._branch.repository),
338
369
revid_list.reverse()
340
371
return revid_list[index:]
343
def get_revision_history_matching(self, revid_list, text):
344
self.log.debug('searching %d revisions for %r', len(revid_list), text)
346
# this is going to be painfully slow. :(
349
for revid in revid_list:
350
change = self.get_changes([ revid ])[0]
351
if text in change.comment.lower():
353
self.log.debug('searched %d revisions for %r in %r secs', len(revid_list), text, time.time() - z)
356
def get_revision_history_matching_indexed(self, revid_list, text):
357
self.log.debug('searching %d revisions for %r', len(revid_list), text)
359
if self._index is None:
360
return self.get_revision_history_matching(revid_list, text)
361
out = self._index.find(text, revid_list)
362
self.log.debug('searched %d revisions for %r in %r secs: %d results', len(revid_list), text, time.time() - z, len(out))
363
# put them in some coherent order :)
364
out = [r for r in self._full_history if r in out]
368
373
def get_search_revid_list(self, query, revid_list):
370
375
given a "quick-search" query, try a few obvious possible meanings:
372
377
- revision id or # ("128.1.3")
373
- date (US style "mm/dd/yy", earth style "dd-mm-yy", or iso style "yyyy-mm-dd")
378
- date (US style "mm/dd/yy", earth style "dd-mm-yy", or \
379
iso style "yyyy-mm-dd")
374
380
- comment text as a fallback
376
382
and return a revid list that matches.
421
432
# if a "revid" is actually a dotted revno, convert it to a revid
422
433
if revid is None:
424
if self.revno_re.match(revid):
425
revid = self._revno_revid[revid]
436
return self.last_revid
438
if self.revno_re.match(revid):
439
revid = self._revno_revid[revid]
441
raise bzrlib.errors.NoSuchRevision(self._branch_nick, revid)
429
444
def get_file_view(self, revid, file_id):
431
Given an optional revid and optional path, return a (revlist, revid)
432
for navigation through the current scope: from the revid (or the
433
latest revision) back to the original revision.
446
Given a revid and optional path, return a (revlist, revid) for
447
navigation through the current scope: from the revid (or the latest
448
revision) back to the original revision.
435
450
If file_id is None, the entire revision history is the list scope.
436
If revid is None, the latest revision is used.
438
452
if revid is None:
439
revid = self._last_revid
453
revid = self.last_revid
440
454
if file_id is not None:
441
# since revid is 'start_revid', possibly should start the path tracing from revid... FIXME
442
inv = self._branch.repository.get_revision_inventory(revid)
455
# since revid is 'start_revid', possibly should start the path
456
# tracing from revid... FIXME
443
457
revlist = list(self.get_short_revision_history_by_fileid(file_id))
444
458
revlist = list(self.get_revids_from(revlist, revid))
446
460
revlist = list(self.get_revids_from(None, revid))
449
return revlist, revid
452
463
def get_view(self, revid, start_revid, file_id, query=None):
454
465
use the URL parameters (revid, start_revid, file_id, and query) to
455
466
determine the revision list we're viewing (start_revid, file_id, query)
456
467
and where we are in it (revid).
458
if a query is given, we're viewing query results.
459
if a file_id is given, we're viewing revisions for a specific file.
460
if a start_revid is given, we're viewing the branch from a
461
specific revision up the tree.
462
(these may be combined to view revisions for a specific file, from
463
a specific revision, with a specific search query.)
465
returns a new (revid, start_revid, revid_list, scan_list) where:
469
- if a query is given, we're viewing query results.
470
- if a file_id is given, we're viewing revisions for a specific
472
- if a start_revid is given, we're viewing the branch from a
473
specific revision up the tree.
475
these may be combined to view revisions for a specific file, from
476
a specific revision, with a specific search query.
478
returns a new (revid, start_revid, revid_list) where:
467
480
- revid: current position within the view
468
481
- start_revid: starting revision of this view
469
482
- revid_list: list of revision ids for this view
471
484
file_id and query are never changed so aren't returned, but they may
472
485
contain vital context for future url navigation.
487
if start_revid is None:
488
start_revid = self.last_revid
474
490
if query is None:
475
revid_list, start_revid = self.get_file_view(start_revid, file_id)
491
revid_list = self.get_file_view(start_revid, file_id)
476
492
if revid is None:
477
493
revid = start_revid
478
494
if revid not in revid_list:
479
495
# if the given revid is not in the revlist, use a revlist that
480
496
# starts at the given revid.
481
revid_list, start_revid = self.get_file_view(revid, file_id)
497
revid_list = self.get_file_view(revid, file_id)
482
499
return revid, start_revid, revid_list
484
501
# potentially limit the search
485
if (start_revid is not None) or (file_id is not None):
486
revid_list, start_revid = self.get_file_view(start_revid, file_id)
502
if file_id is not None:
503
revid_list = self.get_file_view(start_revid, file_id)
488
505
revid_list = None
490
revid_list = self.get_search_revid_list(query, revid_list)
491
if len(revid_list) > 0:
506
revid_list = search.search_revisions(self._branch, query)
507
if revid_list and len(revid_list) > 0:
492
508
if revid not in revid_list:
493
509
revid = revid_list[0]
494
510
return revid, start_revid, revid_list
512
# XXX: This should return a message saying that the search could
513
# not be completed due to either missing the plugin or missing a
497
515
return None, None, []
500
517
def get_inventory(self, revid):
501
return self._branch.repository.get_revision_inventory(revid)
518
if revid not in self._inventory_cache:
519
self._inventory_cache[revid] = (
520
self._branch.repository.get_inventory(revid))
521
return self._inventory_cache[revid]
504
523
def get_path(self, revid, file_id):
505
524
if (file_id is None) or (file_id == ''):
507
path = self._branch.repository.get_revision_inventory(revid).id2path(file_id)
526
path = self.get_inventory(revid).id2path(file_id)
508
527
if (len(path) > 0) and not path.startswith('/'):
509
528
path = '/' + path
512
def get_where_merged(self, revid):
514
return self._where_merged[revid]
531
def get_file_id(self, revid, path):
532
if (len(path) > 0) and not path.startswith('/'):
534
return self.get_inventory(revid).path2id(path)
518
536
def get_merge_point_list(self, revid):
520
538
Return the list of revids that have merged this node.
522
if revid in self._history:
540
if '.' not in self.get_revno(revid):
527
children = self.get_where_merged(revid)
545
children = self._rev_info[self._rev_indices[revid]][1]
529
547
for child in children:
530
child_parents = self._revision_graph[child]
548
child_parents = self._rev_info[self._rev_indices[child]][2]
531
549
if child_parents[0] == revid:
532
550
nexts.append(child)
553
571
revnol = revno.split(".")
554
572
revnos = ".".join(revnol[:-2])
555
573
revnolast = int(revnol[-1])
556
if d.has_key(revnos):
558
576
if revnolast < m:
559
d[revnos] = ( revnolast, revid )
577
d[revnos] = (revnolast, revid)
561
d[revnos] = ( revnolast, revid )
563
return [ d[revnos][1] for revnos in d.keys() ]
565
def get_branch_nicks(self, changes):
579
d[revnos] = (revnolast, revid)
581
return [revid for (_, revid) in d.itervalues()]
583
def add_branch_nicks(self, change):
567
given a list of changes from L{get_changes}, fill in the branch nicks
568
on all parents and merge points.
585
given a 'change', fill in the branch nicks on all parents and merge
570
588
fetch_set = set()
571
for change in changes:
572
for p in change.parents:
573
fetch_set.add(p.revid)
574
for p in change.merge_points:
575
fetch_set.add(p.revid)
589
for p in change.parents:
590
fetch_set.add(p.revid)
591
for p in change.merge_points:
592
fetch_set.add(p.revid)
576
593
p_changes = self.get_changes(list(fetch_set))
577
594
p_change_dict = dict([(c.revid, c) for c in p_changes])
578
for change in changes:
579
# arch-converted branches may not have merged branch info :(
580
for p in change.parents:
581
if p.revid in p_change_dict:
582
p.branch_nick = p_change_dict[p.revid].branch_nick
584
p.branch_nick = '(missing)'
585
for p in change.merge_points:
586
if p.revid in p_change_dict:
587
p.branch_nick = p_change_dict[p.revid].branch_nick
589
p.branch_nick = '(missing)'
592
def get_changes(self, revid_list, get_diffs=False):
593
if self._change_cache is None:
594
changes = self.get_changes_uncached(revid_list, get_diffs)
596
changes = self._change_cache.get_changes(revid_list, get_diffs)
595
for p in change.parents:
596
if p.revid in p_change_dict:
597
p.branch_nick = p_change_dict[p.revid].branch_nick
599
p.branch_nick = '(missing)'
600
for p in change.merge_points:
601
if p.revid in p_change_dict:
602
p.branch_nick = p_change_dict[p.revid].branch_nick
604
p.branch_nick = '(missing)'
606
def get_changes(self, revid_list):
607
"""Return a list of changes objects for the given revids.
609
Revisions not present and NULL_REVISION will be ignored.
611
changes = self.get_changes_uncached(revid_list)
612
if len(changes) == 0:
600
615
# some data needs to be recalculated each time, because it may
601
616
# change as new revisions are added.
602
for i in xrange(len(revid_list)):
603
revid = revid_list[i]
605
merge_revids = self.simplify_merge_point_list(self.get_merge_point_list(revid))
606
change.merge_points = [util.Container(revid=r, revno=self.get_revno(r)) for r in merge_revids]
617
for change in changes:
618
merge_revids = self.simplify_merge_point_list(
619
self.get_merge_point_list(change.revid))
620
change.merge_points = [
621
util.Container(revid=r,
622
revno=self.get_revno(r)) for r in merge_revids]
623
if len(change.parents) > 0:
624
change.parents = [util.Container(revid=r,
625
revno=self.get_revno(r)) for r in change.parents]
626
change.revno = self.get_revno(change.revid)
629
for change in changes:
630
change.parity = parity
610
# alright, let's profile this sucka.
611
def _get_changes_profiled(self, revid_list, get_diffs=False):
612
from loggerhead.lsprof import profile
614
ret, stats = profile(self.get_changes_uncached, revid_list, get_diffs)
617
cPickle.dump(stats, open('lsprof.stats', 'w'), 2)
618
self.log.info('lsprof complete!')
621
def _get_deltas_for_revisions_with_trees(self, revisions):
622
"""Produce a generator of revision deltas.
624
Note that the input is a sequence of REVISIONS, not revision_ids.
625
Trees will be held in memory until the generator exits.
626
Each delta is relative to the revision's lefthand predecessor.
628
required_trees = set()
629
for revision in revisions:
630
required_trees.add(revision.revision_id)
631
required_trees.update(revision.parent_ids[:1])
632
trees = dict((t.get_revision_id(), t) for
633
t in self._branch.repository.revision_trees(required_trees))
635
self._branch.repository.lock_read()
637
for revision in revisions:
638
if not revision.parent_ids:
639
old_tree = self._branch.repository.revision_tree(None)
641
old_tree = trees[revision.parent_ids[0]]
642
tree = trees[revision.revision_id]
643
ret.append((tree, old_tree, tree.changes_from(old_tree)))
646
self._branch.repository.unlock()
648
def entry_from_revision(self, revision):
649
commit_time = datetime.datetime.fromtimestamp(revision.timestamp)
651
parents = [util.Container(revid=r, revno=self.get_revno(r)) for r in revision.parent_ids]
653
if len(parents) == 0:
656
left_parent = revision.parent_ids[0]
635
def get_changes_uncached(self, revid_list):
636
# FIXME: deprecated method in getting a null revision
637
revid_list = filter(lambda revid: not bzrlib.revision.is_null(revid),
639
parent_map = self._branch.repository.get_graph().get_parent_map(
641
# We need to return the answer in the same order as the input,
643
present_revids = [revid for revid in revid_list
644
if revid in parent_map]
645
rev_list = self._branch.repository.get_revisions(present_revids)
647
return [self._change_from_revision(rev) for rev in rev_list]
649
def _change_from_revision(self, revision):
651
Given a bzrlib Revision, return a processed "change" for use in
658
654
message, short_message = clean_message(revision.message)
656
tags = self._branch.tags.get_reverse_tag_dict()
659
if tags.has_key(revision.revision_id):
660
revtags = ', '.join(tags[revision.revision_id])
661
663
'revid': revision.revision_id,
662
'revno': self.get_revno(revision.revision_id),
664
'author': revision.committer,
664
'date': datetime.datetime.fromtimestamp(revision.timestamp),
665
'utc_date': datetime.datetime.utcfromtimestamp(revision.timestamp),
666
'authors': revision.get_apparent_authors(),
665
667
'branch_nick': revision.properties.get('branch-nick', None),
666
668
'short_comment': short_message,
667
669
'comment': revision.message,
668
670
'comment_clean': [util.html_clean(s) for s in message],
671
'parents': revision.parent_ids,
672
'bugs': [bug.split()[0] for bug in revision.properties.get('bugs', '').splitlines()],
675
if isinstance(revision, bzrlib.foreign.ForeignRevision):
676
foreign_revid, mapping = (rev.foreign_revid, rev.mapping)
677
elif ":" in revision.revision_id:
679
foreign_revid, mapping = \
680
bzrlib.foreign.foreign_vcs_registry.parse_revision_id(
681
revision.revision_id)
682
except bzrlib.errors.InvalidRevisionId:
687
if foreign_revid is not None:
688
entry["foreign_vcs"] = mapping.vcs.abbreviation
689
entry["foreign_revid"] = mapping.vcs.show_foreign_revid(foreign_revid)
671
690
return util.Container(entry)
674
@with_bzrlib_read_lock
675
def get_changes_uncached(self, revid_list, get_diffs=False):
679
rev_list = self._branch.repository.get_revisions(revid_list)
681
except (KeyError, bzrlib.errors.NoSuchRevision), e:
682
# this sometimes happens with arch-converted branches.
683
# i don't know why. :(
684
self.log.debug('No such revision (skipping): %s', e)
685
revid_list.remove(e.revision)
687
delta_list = self._get_deltas_for_revisions_with_trees(rev_list)
688
combined_list = zip(rev_list, delta_list)
691
for rev, (new_tree, old_tree, delta) in combined_list:
692
entry = self.entry_from_revision(rev)
693
entry.changes = self.parse_delta(delta, get_diffs, old_tree, new_tree)
694
entries.append(entry)
698
@with_bzrlib_read_lock
699
def _get_diff(self, revid1, revid2):
700
rev_tree1 = self._branch.repository.revision_tree(revid1)
701
rev_tree2 = self._branch.repository.revision_tree(revid2)
702
delta = rev_tree2.changes_from(rev_tree1)
703
return rev_tree1, rev_tree2, delta
705
def get_diff(self, revid1, revid2):
706
rev_tree1, rev_tree2, delta = self._get_diff(revid1, revid2)
707
entry = self.get_changes([ revid2 ], False)[0]
708
entry.changes = self.parse_delta(delta, True, rev_tree1, rev_tree2)
692
def get_file_changes_uncached(self, entry):
694
old_revid = entry.parents[0].revid
696
old_revid = bzrlib.revision.NULL_REVISION
697
return self.file_changes_for_revision_ids(old_revid, entry.revid)
699
def get_file_changes(self, entry):
700
if self._file_change_cache is None:
701
return self.get_file_changes_uncached(entry)
703
return self._file_change_cache.get_file_changes(entry)
705
def add_changes(self, entry):
706
changes = self.get_file_changes(entry)
707
entry.changes = changes
712
709
def get_file(self, file_id, revid):
713
"returns (path, filename, data)"
710
"""Returns (path, filename, file contents)"""
714
711
inv = self.get_inventory(revid)
715
712
inv_entry = inv[file_id]
716
713
rev_tree = self._branch.repository.revision_tree(inv_entry.revision)
718
715
if not path.startswith('/'):
719
716
path = '/' + path
720
717
return path, inv_entry.name, rev_tree.get_file_text(file_id)
723
def parse_delta(self, delta, get_diffs=True, old_tree=None, new_tree=None):
719
def file_changes_for_revision_ids(self, old_revid, new_revid):
725
721
Return a nested data structure containing the changes in a delta::
727
723
added: list((filename, file_id)),
728
724
renamed: list((old_filename, new_filename, file_id)),
729
725
deleted: list((filename, file_id)),
737
type: str('context', 'delete', or 'insert'),
743
if C{get_diffs} is false, the C{chunks} will be omitted.
750
def rich_filename(path, kind):
751
if kind == 'directory':
753
if kind == 'symlink':
757
def process_diff(diff):
760
for line in diff.splitlines():
763
if line.startswith('+++ ') or line.startswith('--- '):
765
if line.startswith('@@ '):
767
if chunk is not None:
769
chunk = util.Container()
771
lines = [int(x.split(',')[0][1:]) for x in line.split(' ')[1:3]]
772
old_lineno = lines[0]
773
new_lineno = lines[1]
774
elif line.startswith(' '):
775
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=new_lineno,
776
type='context', line=util.html_clean(line[1:])))
779
elif line.startswith('+'):
780
chunk.diff.append(util.Container(old_lineno=None, new_lineno=new_lineno,
781
type='insert', line=util.html_clean(line[1:])))
783
elif line.startswith('-'):
784
chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=None,
785
type='delete', line=util.html_clean(line[1:])))
788
chunk.diff.append(util.Container(old_lineno=None, new_lineno=None,
789
type='unknown', line=util.html_clean(repr(line))))
790
if chunk is not None:
794
def handle_modify(old_path, new_path, fid, kind):
796
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid))
798
old_lines = old_tree.get_file_lines(fid)
799
new_lines = new_tree.get_file_lines(fid)
801
bzrlib.diff.internal_diff(old_path, old_lines, new_path, new_lines, buffer)
802
diff = buffer.getvalue()
803
modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid, chunks=process_diff(diff), raw_diff=diff))
805
for path, fid, kind in delta.added:
806
added.append((rich_filename(path, kind), fid))
808
for path, fid, kind, text_modified, meta_modified in delta.modified:
809
handle_modify(path, path, fid, kind)
811
for oldpath, newpath, fid, kind, text_modified, meta_modified in delta.renamed:
812
renamed.append((rich_filename(oldpath, kind), rich_filename(newpath, kind), fid))
813
if meta_modified or text_modified:
814
handle_modify(oldpath, newpath, fid, kind)
816
for path, fid, kind in delta.removed:
817
removed.append((rich_filename(path, kind), fid))
819
return util.Container(added=added, renamed=renamed, removed=removed, modified=modified)
822
def add_side_by_side(changes):
823
# FIXME: this is a rotten API.
824
for change in changes:
825
for m in change.changes.modified:
826
m.sbs_chunks = _make_side_by_side(m.chunks)
829
def get_filelist(self, inv, path, sort_type=None):
831
return the list of all files (and their attributes) within a given
834
while path.endswith('/'):
836
if path.startswith('/'):
839
entries = inv.entries()
842
for filepath, entry in entries:
843
fetch_set.add(entry.revision)
844
change_dict = dict([(c.revid, c) for c in self.get_changes(list(fetch_set))])
847
for filepath, entry in entries:
848
if posixpath.dirname(filepath) != path:
850
filename = posixpath.basename(filepath)
851
rich_filename = filename
853
if entry.kind == 'directory':
857
revid = entry.revision
858
change = change_dict[revid]
860
file = util.Container(filename=filename, rich_filename=rich_filename, executable=entry.executable, kind=entry.kind,
861
pathname=pathname, file_id=entry.file_id, size=entry.text_size, revid=revid, change=change)
862
file_list.append(file)
864
if sort_type == 'filename':
865
file_list.sort(key=lambda x: x.filename)
866
elif sort_type == 'size':
867
file_list.sort(key=lambda x: x.size)
868
elif sort_type == 'date':
869
file_list.sort(key=lambda x: x.change.date)
872
for file in file_list:
879
_BADCHARS_RE = re.compile(ur'[\x00-\x08\x0b-\x0c\x0e-\x1f]')
882
def annotate_file(self, file_id, revid):
887
file_revid = self.get_inventory(revid)[file_id].revision
890
# because we cache revision metadata ourselves, it's actually much
891
# faster to call 'annotate_iter' on the weave directly than it is to
892
# ask bzrlib to annotate for us.
893
w = self._branch.repository.weave_store.get_weave(file_id, self._branch.repository.get_transaction())
896
for line_revid, text in w.annotate_iter(file_revid):
897
revid_set.add(line_revid)
898
if self._BADCHARS_RE.match(text):
899
# bail out; this isn't displayable text
900
yield util.Container(parity=0, lineno=1, status='same',
901
text='<i>' + util.html_clean('(This is a binary file.)') + '</i>',
902
change=util.Container())
904
change_cache = dict([(c.revid, c) for c in self.get_changes(list(revid_set))])
906
last_line_revid = None
907
for line_revid, text in w.annotate_iter(file_revid):
908
if line_revid == last_line_revid:
909
# remember which lines have a new revno and which don't
914
last_line_revid = line_revid
915
change = change_cache[line_revid]
916
trunc_revno = change.revno
917
if len(trunc_revno) > 10:
918
trunc_revno = trunc_revno[:9] + '...'
920
yield util.Container(parity=parity, lineno=lineno, status=status,
921
change=change, text=util.html_clean(text))
924
self.log.debug('annotate: %r secs' % (time.time() - z,))
927
@with_bzrlib_read_lock
928
def get_bundle(self, revid, compare_revid=None):
929
if compare_revid is None:
930
parents = self._revision_graph[revid]
932
compare_revid = parents[0]
936
bzrlib.bundle.serializer.write_bundle(self._branch.repository, revid, compare_revid, s)
730
text_changes: list((filename, file_id)),
732
repo = self._branch.repository
733
if (bzrlib.revision.is_null(old_revid) or
734
bzrlib.revision.is_null(new_revid)):
735
old_tree, new_tree = map(
736
repo.revision_tree, [old_revid, new_revid])
738
old_tree, new_tree = repo.revision_trees([old_revid, new_revid])
740
reporter = FileChangeReporter(old_tree.inventory, new_tree.inventory)
742
bzrlib.delta.report_changes(new_tree.iter_changes(old_tree), reporter)
744
return util.Container(
745
added=sorted(reporter.added, key=lambda x: x.filename),
746
renamed=sorted(reporter.renamed, key=lambda x: x.new_filename),
747
removed=sorted(reporter.removed, key=lambda x: x.filename),
748
modified=sorted(reporter.modified, key=lambda x: x.filename),
749
text_changes=sorted(reporter.text_changes, key=lambda x: x.filename))