~loggerhead-team/loggerhead/trunk-rich

« back to all changes in this revision

Viewing changes to loggerhead/history.py

Committer: Matt Nordhoff
Date: 2009-05-02 14:01:05 UTC
Revision ID: mnordhoff@mattnordhoff.com-20090502140105-m07dxhtzfgsgu2ia

https://launchpad.net/bugs/370845

Make sure to close mkstemp's file descriptor (bug #370845)

files added:
loggerhead.conf.example

loggerhead/apps/config.py

loggerhead/templates/browse.pt

loggerhead/trace.py

start-loggerhead

start-loggerhead.1

stop-loggerhead

stop-loggerhead.1

files removed:
HACKING

docs

docs/Makefile

docs/_build

docs/_static

docs/_templates

docs/conf.py

docs/index.rst

docs/make.bat

docs/serve-branches.rst

info.py

load_test_scripts

load_test_scripts/multiple_instances.script

load_test_scripts/simple.script

loggerhead/apps/http_head.py

loggerhead/controllers/annotate_ui.py

loggerhead/exporter.py

loggerhead/load_test.py

loggerhead/static/downloads

loggerhead/static/images/favicon.png

loggerhead/static/images/ico_bug.png

loggerhead/static/images/ico_tag.gif

loggerhead/tests/test_history.py

loggerhead/tests/test_http_head.py

loggerhead/tests/test_load_test.py

loggerhead/tests/test_revision_ui.py

loggerhead/tests/test_util.py

serve-branches

files renamed:
README => README.txt

loggerhead/apps/transport.py => loggerhead/apps/filesystem.py

loggerhead/controllers/view_ui.py => loggerhead/controllers/annotate_ui.py

loggerhead/static/css/view.css => loggerhead/static/css/annotate.css

loggerhead/templates/view.pt => loggerhead/templates/annotate.pt

loggerhead/main.py => serve-branches *

files modified:
.bzrignore

MANIFEST.in

Makefile

NEWS

__init__.py

loggerhead/__init__.py

loggerhead/apps/__init__.py

loggerhead/apps/branch.py

loggerhead/apps/error.py

loggerhead/changecache.py

loggerhead/config.py

loggerhead/controllers/__init__.py

loggerhead/controllers/atom_ui.py

loggerhead/controllers/changelog_ui.py

loggerhead/controllers/diff_ui.py

loggerhead/controllers/directory_ui.py

loggerhead/controllers/download_ui.py

loggerhead/controllers/error_ui.py

loggerhead/controllers/filediff_ui.py

loggerhead/controllers/inventory_ui.py

loggerhead/controllers/revision_ui.py

loggerhead/controllers/revlog_ui.py

loggerhead/daemon.py

loggerhead/highlight.py

loggerhead/history.py

loggerhead/lsprof.py

loggerhead/middleware/profile.py

loggerhead/search.py

loggerhead/static/css/diff.css

loggerhead/static/css/files.css

loggerhead/static/css/global.css

loggerhead/static/images/bg_Tabs.gif

loggerhead/static/images/bg_menuTabs.gif

loggerhead/static/images/bg_submenuTabs.gif

loggerhead/static/javascript/custom.js

loggerhead/templatefunctions.py

loggerhead/templates/atom.pt

loggerhead/templates/branchinfo.pt

loggerhead/templates/changelog.pt

loggerhead/templates/directory.pt

loggerhead/templates/inventory.pt

loggerhead/templates/macros.pt

loggerhead/templates/revision.pt

loggerhead/templates/revisioninfo.pt

loggerhead/tests/__init__.py

loggerhead/tests/test_controllers.py

loggerhead/tests/test_simple.py

loggerhead/util.py

loggerhead/wholehistory.py

loggerhead/zptsupport.py

loggerheadd

setup.py *

Show diffs side-by-side

added added

removed removed

loggerhead/history.py

# (Authored by Martin Albisetti <argentina@gmail.com>)

import bisect

import datetime

import logging

import marshal

import re

import textwrap

import threading

import tarfile

from bzrlib import tag

import time

import urllib

from StringIO import StringIO

from loggerhead import search

from loggerhead import util

from loggerhead.wholehistory import compute_whole_history_data

import bzrlib

import bzrlib.branch

import bzrlib.delta

import bzrlib.diff

import bzrlib.errors

import bzrlib.foreign

import bzrlib.lru_cache

import bzrlib.progress

import bzrlib.revision

from loggerhead import search

from loggerhead import util

from loggerhead.wholehistory import compute_whole_history_data

import bzrlib.textfile

import bzrlib.tsort

import bzrlib.ui

# bzrlib's UIFactory is not thread-safe

uihack = threading.local()

class ThreadSafeUIFactory (bzrlib.ui.SilentUIFactory):

def nested_progress_bar(self):

if getattr(uihack, '_progress_bar_stack', None) is None:

pbs = bzrlib.progress.ProgressBarStack(

klass=bzrlib.progress.DummyProgress)

uihack._progress_bar_stack = pbs

return uihack._progress_bar_stack.get_nested()

bzrlib.ui.ui_factory = ThreadSafeUIFactory()

def is_branch(folder):

try:

112

return path

113

114

115

# from bzrlib

116

117

118

class _RevListToTimestamps(object):

119

"""This takes a list of revisions, and allows you to bisect by date"""

120

107

133

return len(self.revid_list)

108

134

109

135

class FileChangeReporter(object):

110

111

136

def __init__(self, old_inv, new_inv):

112

137

self.added = []

113

138

self.modified = []

153

178

filename=rich_filename(paths[1], kind),

154

179

file_id=file_id))

155

180

156

# The lru_cache is not thread-safe, so we need a lock around it for

157

# all threads.

158

rev_info_memory_cache_lock = threading.RLock()

159

181

160

182

class RevInfoMemoryCache(object):

161

183

"""A store that validates values against the revids they were stored with.

179

201

If a value was stored under `key`, with the same revid, return it.

180

202

Otherwise return None.

181

203

"""

182

rev_info_memory_cache_lock.acquire()

183

try:

184

cached = self._cache.get(key)

185

finally:

186

rev_info_memory_cache_lock.release()

204

cached = self._cache.get(key)

187

205

if cached is None:

188

206

return None

189

207

stored_revid, data = cached

195

213

def set(self, key, revid, data):

196

214

"""Store `data` under `key`, to be checked against `revid` on get().

197

215

"""

198

rev_info_memory_cache_lock.acquire()

199

try:

200

self._cache[key] = (revid, data)

201

finally:

202

rev_info_memory_cache_lock.release()

203

204

# Used to store locks that prevent multiple threads from building a

205

# revision graph for the same branch at the same time, because that can

206

# cause severe performance issues that are so bad that the system seems

207

# to hang.

208

revision_graph_locks = {}

209

revision_graph_check_lock = threading.Lock()

210

211

class History(object):

216

self._cache[key] = (revid, data)

217

218

219

class History (object):

212

220

"""Decorate a branch to provide information for rendering.

213

221

214

222

History objects are expected to be short lived -- when serving a request

216

224

around it, serve the request, throw the History object away, unlock the

217

225

branch and throw it away.

218

226

227

:ivar _file_change_cache: An object that caches information about the

228

files that changed between two revisions.

219

229

:ivar _rev_info: A list of information about revisions. This is by far

220

230

the most cryptic data structure in loggerhead. At the top level, it

221

231

is a list of 3-tuples [(merge-info, where-merged, parents)].

226

236

parents of this revision.

227

237

:ivar _rev_indices: A dictionary mapping each revision id to the index of

228

238

the information about it in _rev_info.

239

:ivar _full_history: A list of all revision ids in the ancestry of the

240

branch, in merge-sorted order. This is a bit silly, and shouldn't

241

really be stored on the instance...

229

242

:ivar _revno_revid: A dictionary mapping stringified revnos to revision

230

243

ids.

231

244

"""

244

257

def update_missed_caches():

245

258

for cache in missed_caches:

246

259

cache.set(cache_key, self.last_revid, self._rev_info)

247

248

# Theoretically, it's possible for two threads to race in creating

249

# the Lock() object for their branch, so we put a lock around

250

# creating the per-branch Lock().

251

revision_graph_check_lock.acquire()

252

try:

253

if cache_key not in revision_graph_locks:

254

revision_graph_locks[cache_key] = threading.Lock()

255

finally:

256

revision_graph_check_lock.release()

257

258

revision_graph_locks[cache_key].acquire()

259

try:

260

for cache in caches:

261

data = cache.get(cache_key, self.last_revid)

262

if data is not None:

263

self._rev_info = data

264

update_missed_caches()

265

break

266

else:

267

missed_caches.append(cache)

268

else:

269

whole_history_data = compute_whole_history_data(self._branch)

270

self._rev_info, self._rev_indices = whole_history_data

260

for cache in caches:

261

data = cache.get(cache_key, self.last_revid)

262

if data is not None:

263

self._rev_info = data

271

264

update_missed_caches()

272

finally:

273

revision_graph_locks[cache_key].release()

265

break

266

else:

267

missed_caches.append(cache)

268

else:

269

whole_history_data = compute_whole_history_data(self._branch)

270

self._rev_info, self._rev_indices = whole_history_data

271

update_missed_caches()

274

272

275

273

if self._rev_indices is not None:

274

self._full_history = []

276

275

self._revno_revid = {}

277

276

for ((_, revid, _, revno_str, _), _, _) in self._rev_info:

278

277

self._revno_revid[revno_str] = revid

278

self._full_history.append(revid)

279

else:

280

self._full_history = []

280

281

self._revno_revid = {}

281

282

self._rev_indices = {}

282

283

for ((seq, revid, _, revno_str, _), _, _) in self._rev_info:

283

284

self._rev_indices[revid] = seq

284

285

self._revno_revid[revno_str] = revid

286

self._full_history.append(revid)

285

287

286

def __init__(self, branch, whole_history_data_cache,

288

def __init__(self, branch, whole_history_data_cache, file_cache=None,

287

289

revinfo_disk_cache=None, cache_key=None):

288

290

assert branch.is_locked(), (

289

291

"Can only construct a History object with a read-locked branch.")

292

if file_cache is not None:

293

self._file_change_cache = file_cache

294

file_cache.history = self

295

else:

296

self._file_change_cache = None

290

297

self._branch = branch

291

self._branch_tags = None

292

298

self._inventory_cache = {}

293

299

self._branch_nick = self._branch.get_config().get_nickname()

294

self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,))

300

self.log = logging.getLogger('loggerhead.%s' % self._branch_nick)

295

301

296

302

self.last_revid = branch.last_revision()

297

303

321

327

revid in revid_list.

322

328

"""

323

329

if revid_list is None:

324

# Just yield the mainline, starting at start_revid

325

revid = start_revid

326

is_null = bzrlib.revision.is_null

327

while not is_null(revid):

328

yield revid

329

parents = self._rev_info[self._rev_indices[revid]][2]

330

if not parents:

331

return

332

revid = parents[0]

333

return

330

revid_list = self._full_history

334

331

revid_set = set(revid_list)

335

332

revid = start_revid

336

333

343

340

r.add(self._rev_info[i][0][1])

344

341

i += 1

345

342

return r

346

while revid_set:

343

while 1:

347

344

if bzrlib.revision.is_null(revid):

348

345

return

349

rev_introduced = introduced_revisions(revid)

350

matching = rev_introduced.intersection(revid_set)

351

if matching:

352

# We don't need to look for these anymore.

353

revid_set.difference_update(matching)

346

if introduced_revisions(revid) & revid_set:

354

347

yield revid

355

348

parents = self._rev_info[self._rev_indices[revid]][2]

356

349

if len(parents) == 0:

360

353

def get_short_revision_history_by_fileid(self, file_id):

361

354

# FIXME: would be awesome if we could get, for a folder, the list of

362

355

# revisions where items within that folder changed.i

363

possible_keys = [(file_id, revid) for revid in self._rev_indices]

364

get_parent_map = self._branch.repository.texts.get_parent_map

365

# We chunk the requests as this works better with GraphIndex.

366

# See _filter_revisions_touching_file_id in bzrlib/log.py

367

# for more information.

368

revids = []

369

chunk_size = 1000

370

for start in xrange(0, len(possible_keys), chunk_size):

371

next_keys = possible_keys[start:start + chunk_size]

372

revids += [k[1] for k in get_parent_map(next_keys)]

373

del possible_keys, next_keys

356

try:

357

# FIXME: Workaround for bzr versions prior to 1.6b3.

358

# Remove me eventually pretty please :)

359

w = self._branch.repository.weave_store.get_weave(

360

file_id, self._branch.repository.get_transaction())

361

w_revids = w.versions()

362

revids = [r for r in self._full_history if r in w_revids]

363

except AttributeError:

364

possible_keys = [(file_id, revid) for revid in self._full_history]

365

get_parent_map = self._branch.repository.texts.get_parent_map

366

# We chunk the requests as this works better with GraphIndex.

367

# See _filter_revisions_touching_file_id in bzrlib/log.py

368

# for more information.

369

revids = []

370

chunk_size = 1000

371

for start in xrange(0, len(possible_keys), chunk_size):

372

next_keys = possible_keys[start:start + chunk_size]

373

revids += [k[1] for k in get_parent_map(next_keys)]

374

del possible_keys, next_keys

374

375

return revids

375

376

377

def get_revision_history_since(self, revid_list, date):

471

472

if revid is None:

472

473

revid = self.last_revid

473

474

if file_id is not None:

474

revlist = list(

475

self.get_short_revision_history_by_fileid(file_id))

476

revlist = self.get_revids_from(revlist, revid)

475

# since revid is 'start_revid', possibly should start the path

476

# tracing from revid... FIXME

477

revlist = list(self.get_short_revision_history_by_fileid(file_id))

478

revlist = list(self.get_revids_from(revlist, revid))

477

479

else:

478

revlist = self.get_revids_from(None, revid)

480

revlist = list(self.get_revids_from(None, revid))

479

481

return revlist

480

482

481

@staticmethod

482

def _iterate_sufficiently(iterable, stop_at, extra_rev_count):

483

"""Return a list of iterable.

484

485

If extra_rev_count is None, fully consume iterable.

486

Otherwise, stop at 'stop_at' + extra_rev_count.

487

488

Example:

489

iterate until you find stop_at, then iterate 10 more times.

490

"""

491

if extra_rev_count is None:

492

return list(iterable)

493

result = []

494

found = False

495

for n in iterable:

496

result.append(n)

497

if n == stop_at:

498

found = True

499

break

500

if found:

501

for count, n in enumerate(iterable):

502

if count >= extra_rev_count:

503

break

504

result.append(n)

505

return result

506

507

def get_view(self, revid, start_revid, file_id, query=None,

508

extra_rev_count=None):

483

def get_view(self, revid, start_revid, file_id, query=None):

509

484

"""

510

485

use the URL parameters (revid, start_revid, file_id, and query) to

511

486

determine the revision list we're viewing (start_revid, file_id, query)

516

491

file.

517

492

- if a start_revid is given, we're viewing the branch from a

518

493

specific revision up the tree.

519

- if extra_rev_count is given, find the view from start_revid =>

520

revid, and continue an additional 'extra_rev_count'. If not

521

given, then revid_list will contain the full history of

522

start_revid

523

494

524

495

these may be combined to view revisions for a specific file, from

525

496

a specific revision, with a specific search query.

538

509

539

510

if query is None:

540

511

revid_list = self.get_file_view(start_revid, file_id)

541

revid_list = self._iterate_sufficiently(revid_list, revid,

542

extra_rev_count)

543

512

if revid is None:

544

513

revid = start_revid

545

514

if revid not in revid_list:

546

515

# if the given revid is not in the revlist, use a revlist that

547

516

# starts at the given revid.

548

517

revid_list = self.get_file_view(revid, file_id)

549

revid_list = self._iterate_sufficiently(revid_list, revid,

550

extra_rev_count)

551

518

start_revid = revid

552

519

return revid, start_revid, revid_list

553

520

570

537

def get_inventory(self, revid):

571

538

if revid not in self._inventory_cache:

572

539

self._inventory_cache[revid] = (

573

self._branch.repository.get_inventory(revid))

540

self._branch.repository.get_revision_inventory(revid))

574

541

return self._inventory_cache[revid]

575

542

576

543

def get_path(self, revid, file_id):

624

591

revnol = revno.split(".")

625

592

revnos = ".".join(revnol[:-2])

626

593

revnolast = int(revnol[-1])

627

if revnos in d:

594

if revnos in d.keys():

628

595

m = d[revnos][0]

629

596

if revnolast < m:

630

597

d[revnos] = (revnolast, revid)

631

598

else:

632

599

d[revnos] = (revnolast, revid)

633

600

634

return [revid for (_, revid) in d.itervalues()]

601

return [d[revnos][1] for revnos in d.keys()]

635

602

636

603

def add_branch_nicks(self, change):

637

604

"""

704

671

Given a bzrlib Revision, return a processed "change" for use in

705

672

templates.

706

673

"""

674

commit_time = datetime.datetime.fromtimestamp(revision.timestamp)

675

676

parents = [util.Container(revid=r,

677

revno=self.get_revno(r)) for r in revision.parent_ids]

678

707

679

message, short_message = clean_message(revision.message)

708

680

709

if self._branch_tags is None:

710

self._branch_tags = self._branch.tags.get_reverse_tag_dict()

711

712

revtags = None

713

if revision.revision_id in self._branch_tags:

714

# tag.sort_* functions expect (tag, data) pairs, so we generate them,

715

# and then strip them

716

tags = [(t, None) for t in self._branch_tags[revision.revision_id]]

717

sort_func = getattr(tag, 'sort_natural', None)

718

if sort_func is None:

719

tags.sort()

720

else:

721

sort_func(self._branch, tags)

722

revtags = u', '.join([t[0] for t in tags])

681

try:

682

authors = revision.get_apparent_authors()

683

except AttributeError:

684

authors = [revision.get_apparent_author()]

723

685

724

686

entry = {

725

687

'revid': revision.revision_id,

726

'date': datetime.datetime.fromtimestamp(revision.timestamp),

727

'utc_date': datetime.datetime.utcfromtimestamp(revision.timestamp),

728

'committer': revision.committer,

729

'authors': revision.get_apparent_authors(),

688

'date': commit_time,

689

'authors': authors,

730

690

'branch_nick': revision.properties.get('branch-nick', None),

731

691

'short_comment': short_message,

732

692

'comment': revision.message,

733

693

'comment_clean': [util.html_clean(s) for s in message],

734

694

'parents': revision.parent_ids,

735

'bugs': [bug.split()[0] for bug in revision.properties.get('bugs', '').splitlines()],

736

'tags': revtags,

737

695

}

738

if isinstance(revision, bzrlib.foreign.ForeignRevision):

739

foreign_revid, mapping = (

740

revision.foreign_revid, revision.mapping)

741

elif ":" in revision.revision_id:

742

try:

743

foreign_revid, mapping = \

744

bzrlib.foreign.foreign_vcs_registry.parse_revision_id(

745

revision.revision_id)

746

except bzrlib.errors.InvalidRevisionId:

747

foreign_revid = None

748

mapping = None

749

else:

750

foreign_revid = None

751

if foreign_revid is not None:

752

entry["foreign_vcs"] = mapping.vcs.abbreviation

753

entry["foreign_revid"] = mapping.vcs.show_foreign_revid(foreign_revid)

754

696

return util.Container(entry)

755

697

756

def get_file_changes(self, entry):

698

def get_file_changes_uncached(self, entry):

699

repo = self._branch.repository

757

700

if entry.parents:

758

701

old_revid = entry.parents[0].revid

759

702

else:

760

703

old_revid = bzrlib.revision.NULL_REVISION

761

704

return self.file_changes_for_revision_ids(old_revid, entry.revid)

762

705

706

def get_file_changes(self, entry):

707

if self._file_change_cache is None:

708

return self.get_file_changes_uncached(entry)

709

else:

710

return self._file_change_cache.get_file_changes(entry)

711

763

712

def add_changes(self, entry):

764

713

changes = self.get_file_changes(entry)

765

714

entry.changes = changes

766

715

767

716

def get_file(self, file_id, revid):

768

"""Returns (path, filename, file contents)"""

717

"returns (path, filename, data)"

769

718

inv = self.get_inventory(revid)

770

719

inv_entry = inv[file_id]

771

720

rev_tree = self._branch.repository.revision_tree(inv_entry.revision)

788

737

text_changes: list((filename, file_id)),

789

738

"""

790

739

repo = self._branch.repository

791

if (bzrlib.revision.is_null(old_revid) or

792

bzrlib.revision.is_null(new_revid)):

740

if bzrlib.revision.is_null(old_revid) or \

741

bzrlib.revision.is_null(new_revid):

793

742

old_tree, new_tree = map(

794

743

repo.revision_tree, [old_revid, new_revid])

795

744

else:

800

749

bzrlib.delta.report_changes(new_tree.iter_changes(old_tree), reporter)

801

750

802

751

return util.Container(

803

added=sorted(reporter.added, key=lambda x: x.filename),

804

renamed=sorted(reporter.renamed, key=lambda x: x.new_filename),

805

removed=sorted(reporter.removed, key=lambda x: x.filename),

806

modified=sorted(reporter.modified, key=lambda x: x.filename),

807

text_changes=sorted(reporter.text_changes,

808

key=lambda x: x.filename))

809

752

added=sorted(reporter.added, key=lambda x:x.filename),

753

renamed=sorted(reporter.renamed, key=lambda x:x.new_filename),

754

removed=sorted(reporter.removed, key=lambda x:x.filename),

755

modified=sorted(reporter.modified, key=lambda x:x.filename),

756

text_changes=sorted(reporter.text_changes, key=lambda x:x.filename))

Older »