~loggerhead-team/loggerhead/trunk-rich

« back to all changes in this revision

Viewing changes to loggerhead/history.py

Committer: Michael Hudson
Date: 2007-10-29 16:19:30 UTC
mto: This revision was merged to the branch mainline in revision 141.
Revision ID: michael.hudson@canonical.com-20071029161930-oxqrd4rd8j1oz3hx

add do nothing check target

files added:
.bzrignore

MANIFEST.in

Makefile

NEWS

homepage

homepage/download

homepage/download/loggerhead-1.0-0.13.tar.gz

homepage/download/loggerhead-1.1-0.13.tar.gz

homepage/download/loggerhead-1.1.1-0.13.tar.gz

homepage/index.html

homepage/loggerhead-background.png

homepage/loggerhead-logo-medium.png

homepage/loggerhead.css

homepage/sphere.png

loggerhead/branchview.py

loggerhead/changecache.py

loggerhead/controllers/bundle_ui.py

loggerhead/controllers/download_ui.py

loggerhead/daemon.py

loggerhead/history2.py

loggerhead/lockfile.py

loggerhead/static/images/bazaar-banner.png

loggerhead/static/images/loggerhead-banner.png

loggerhead/static/images/nav-small-in.gif

loggerhead/static/images/nav-small-out.gif

loggerhead/static/javascript/collapse.js

loggerhead/templates/browse.kid

loggerhead/tests/test_corners.py

loggerhead/tests/test_filechangecache.py

loggerhead/tests/test_simple.py

loggerhead/textindex.py

push-website

stop-loggerhead.py

files removed:
loggerhead/templates/login.kid

loggerhead/templates/welcome.kid

loggerhead/tests/test_controllers.py

loggerhead/tests/test_model.py

sample-prod.cfg

files renamed:
loggerhead.conf => loggerhead.conf.example

files modified:
README.txt

dev.cfg

loggerhead/TODO

loggerhead/config/log.cfg

loggerhead/controllers/__init__.py

loggerhead/controllers/annotate_ui.py

loggerhead/controllers/atom_ui.py

loggerhead/controllers/changelog_ui.py

loggerhead/controllers/inventory_ui.py

loggerhead/controllers/revision_ui.py

loggerhead/history.py

loggerhead/release.py

loggerhead/static/css/style.css

loggerhead/templates/annotate.kid

loggerhead/templates/atom.kid

loggerhead/templates/changelog.kid

loggerhead/templates/inventory.kid

loggerhead/templates/master.kid

loggerhead/templates/revision.kid

loggerhead/util.py

setup.py

start-loggerhead.py

Show diffs side-by-side

added added

removed removed

loggerhead/history.py

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# This file (and many of the web templates) contains work based on the

# "bazaar-webserve" project by Goffredo Baroncelli, which is in turn based

# on "hgweb" by Jake Edge and Matt Mackall.

import bisect

import cgi

import datetime

import logging

from StringIO import StringIO

from loggerhead import util

extra_path = util.get_config().get('bzrpath', None)

if extra_path:

sys.path.insert(0, extra_path)

from loggerhead.util import decorator

import bzrlib

import bzrlib.annotate

import bzrlib.branch

import bzrlib.bundle.serializer

import bzrlib.decorators

import bzrlib.diff

import bzrlib.errors

import bzrlib.progress

import bzrlib.revision

import bzrlib.textfile

import bzrlib.tsort

import bzrlib.ui

log = logging.getLogger("loggerhead.controllers")

# cache lock binds tighter than branch lock

def with_cache_lock(unbound):

def cache_locked(self, *args, **kw):

self._cache_lock.acquire()

try:

return unbound(self, *args, **kw)

finally:

self._cache_lock.release()

cache_locked.__doc__ = unbound.__doc__

cache_locked.__name__ = unbound.__name__

return cache_locked

def with_branch_lock(unbound):

def branch_locked(self, *args, **kw):

self._lock.acquire()

try:

return unbound(self, *args, **kw)

finally:

self._lock.release()

branch_locked.__doc__ = unbound.__doc__

branch_locked.__name__ = unbound.__name__

return branch_locked

with_branch_lock = util.with_lock('_lock', 'branch')

# bzrlib's UIFactory is not thread-safe

bzrlib.ui.ui_factory = ThreadSafeUIFactory()

def _process_side_by_side_buffers(line_list, delete_list, insert_list):

while len(delete_list) < len(insert_list):

delete_list.append((None, '', 'context'))

while len(insert_list) < len(delete_list):

insert_list.append((None, '', 'context'))

while len(delete_list) > 0:

d = delete_list.pop(0)

i = insert_list.pop(0)

line_list.append(util.Container(old_lineno=d[0], new_lineno=i[0],

old_line=d[1], new_line=i[1],

old_type=d[2], new_type=i[2]))

def _make_side_by_side(chunk_list):

"""

turn a normal unified-style diff (post-processed by parse_delta) into a

side-by-side diff structure. the new structure is::

chunks: list(

diff: list(

old_lineno: int,

new_lineno: int,

old_line: str,

new_line: str,

type: str('context' or 'changed'),

100

)

101

)

102

"""

103

out_chunk_list = []

104

for chunk in chunk_list:

105

line_list = []

106

delete_list, insert_list = [], []

107

for line in chunk.diff:

108

if line.type == 'context':

109

if len(delete_list) or len(insert_list):

110

_process_side_by_side_buffers(line_list, delete_list, insert_list)

111

delete_list, insert_list = [], []

112

line_list.append(util.Container(old_lineno=line.old_lineno, new_lineno=line.new_lineno,

113

old_line=line.line, new_line=line.line,

114

old_type=line.type, new_type=line.type))

115

elif line.type == 'delete':

116

delete_list.append((line.old_lineno, line.line, line.type))

117

elif line.type == 'insert':

118

insert_list.append((line.new_lineno, line.line, line.type))

119

if len(delete_list) or len(insert_list):

120

_process_side_by_side_buffers(line_list, delete_list, insert_list)

121

out_chunk_list.append(util.Container(diff=line_list))

122

return out_chunk_list

123

124

125

def is_branch(folder):

126

try:

127

bzrlib.branch.Branch.open(folder)

128

return True

129

except:

130

return False

131

132

133

def clean_message(message):

134

"""Clean up a commit message and return it and a short (1-line) version.

135

136

Commit messages that are long single lines are reflowed using the textwrap

137

module (Robey, the original author of this code, apparently favored this

138

style of message).

139

"""

140

message = message.splitlines()

141

142

if len(message) == 1:

143

message = textwrap.wrap(message[0])

144

145

if len(message) == 0:

146

# We can end up where when (a) the commit message was empty or (b)

147

# when the message consisted entirely of whitespace, in which case

148

# textwrap.wrap() returns an empty list.

149

return [''], ''

150

151

# Make short form of commit message.

152

short_message = message[0]

153

if len(short_message) > 80:

154

short_message = short_message[:80] + '...'

155

156

return message, short_message

157

158

159

def rich_filename(path, kind):

160

if kind == 'directory':

161

path += '/'

162

if kind == 'symlink':

163

path += '@'

164

return path

165

166

167

168

# from bzrlib

169

class _RevListToTimestamps(object):

170

"""This takes a list of revisions, and allows you to bisect by date"""

171

172

__slots__ = ['revid_list', 'repository']

173

174

def __init__(self, revid_list, repository):

175

self.revid_list = revid_list

176

self.repository = repository

177

178

def __getitem__(self, index):

179

"""Get the date of the index'd item"""

180

return datetime.datetime.fromtimestamp(self.repository.get_revision(self.revid_list[index]).timestamp)

181

182

def __len__(self):

183

return len(self.revid_list)

184

185

186

class History (object):

187

188

def __init__(self):

189

self._change_cache = None

self._cache_lock = threading.Lock()

190

self._file_change_cache = None

191

self._index = None

192

self._lock = threading.RLock()

def __del__(self):

if self._change_cache is not None:

self._change_cache.close()

self._change_cache_diffs.close()

100

self._change_cache = None

101

self._change_cache_diffs = None

102

193

103

194

@classmethod

104

def from_branch(cls, branch):

195

def from_branch(cls, branch, name=None):

105

196

z = time.time()

106

197

self = cls()

107

198

self._branch = branch

108

self._history = branch.revision_history()

109

self._revision_graph = branch.repository.get_revision_graph()

110

self._last_revid = self._history[-1]

111

199

self._last_revid = self._branch.last_revision()

200

if self._last_revid is not None:

201

self._revision_graph = branch.repository.get_revision_graph(self._last_revid)

202

else:

203

self._revision_graph = {}

204

205

if name is None:

206

name = self._branch.nick

207

self._name = name

208

self.log = logging.getLogger('loggerhead.%s' % (name,))

209

112

210

self._full_history = []

113

211

self._revision_info = {}

114

212

self._revno_revid = {}

115

213

self._merge_sort = bzrlib.tsort.merge_sort(self._revision_graph, self._last_revid, generate_revno=True)

116

count = 0

117

214

for (seq, revid, merge_depth, revno, end_of_merge) in self._merge_sort:

118

215

self._full_history.append(revid)

119

216

revno_str = '.'.join(str(n) for n in revno)

120

217

self._revno_revid[revno_str] = revid

121

218

self._revision_info[revid] = (seq, revid, merge_depth, revno_str, end_of_merge)

122

count += 1

123

self._count = count

124

219

125

220

# cache merge info

126

221

self._where_merged = {}

127

222

for revid in self._revision_graph.keys():

128

if not revid in self._full_history:

223

if not revid in self._full_history:

129

224

continue

130

225

for parent in self._revision_graph[revid]:

131

226

self._where_merged.setdefault(parent, set()).add(revid)

132

227

133

log.info('built revision graph cache: %r secs' % (time.time() - z,))

228

self.log.info('built revision graph cache: %r secs' % (time.time() - z,))

134

229

return self

135

230

136

231

@classmethod

137

def from_folder(cls, path):

232

def from_folder(cls, path, name=None):

138

233

b = bzrlib.branch.Branch.open(path)

139

return cls.from_branch(b)

234

return cls.from_branch(b, name)

140

235

141

236

@with_branch_lock

142

237

def out_of_date(self):

143

if self._branch.revision_history()[-1] != self._last_revid:

238

# the branch may have been upgraded on disk, in which case we're stale.

239

if self._branch.__class__ is not \

240

bzrlib.branch.Branch.open(self._branch.base).__class__:

144

241

return True

145

return False

146

147

@with_cache_lock

148

def use_cache(self, path):

149

if not os.path.exists(path):

150

os.mkdir(path)

151

# keep a separate cache for the diffs, because they're very time-consuming to fetch.

152

cachefile = os.path.join(path, 'changes')

153

cachefile_diffs = os.path.join(path, 'changes-diffs')

154

155

# why can't shelve allow 'cw'?

156

if not os.path.exists(cachefile):

157

self._change_cache = shelve.open(cachefile, 'c', protocol=2)

158

else:

159

self._change_cache = shelve.open(cachefile, 'w', protocol=2)

160

if not os.path.exists(cachefile_diffs):

161

self._change_cache_diffs = shelve.open(cachefile_diffs, 'c', protocol=2)

162

else:

163

self._change_cache_diffs = shelve.open(cachefile_diffs, 'w', protocol=2)

164

165

# once we process a change (revision), it should be the same forever.

166

log.info('Using change cache %s; %d, %d entries.' % (path, len(self._change_cache), len(self._change_cache_diffs)))

167

self._change_cache_filename = cachefile

168

self._change_cache_diffs_filename = cachefile_diffs

169

170

@with_cache_lock

171

def dont_use_cache(self):

172

# called when a new history object needs to be created. we can't use

173

# the cache files anymore; they belong to the new history object.

174

if self._change_cache is None:

175

return

176

self._change_cache.close()

177

self._change_cache_diffs.close()

178

self._change_cache = None

179

self._change_cache_diffs = None

180

181

@with_cache_lock

242

return self._branch.last_revision() != self._last_revid

243

244

def use_cache(self, cache):

245

self._change_cache = cache

246

247

def use_file_cache(self, cache):

248

self._file_change_cache = cache

249

250

def use_search_index(self, index):

251

self._index = index

252

253

@with_branch_lock

254

def detach(self):

255

# called when a new history object needs to be created, because the

256

# branch history has changed. we need to immediately close and stop

257

# using our caches, because a new history object will be created to

258

# replace us, using the same cache files.

259

# (may also be called during server shutdown.)

260

if self._change_cache is not None:

261

self._change_cache.close()

262

self._change_cache = None

263

if self._index is not None:

264

self._index.close()

265

self._index = None

266

182

267

def flush_cache(self):

183

268

if self._change_cache is None:

184

269

return

185

self._change_cache.sync()

186

self._change_cache_diffs.sync()

187

270

self._change_cache.flush()

271

272

def check_rebuild(self):

273

if self._change_cache is not None:

274

self._change_cache.check_rebuild()

275

if self._index is not None:

276

self._index.check_rebuild()

277

188

278

last_revid = property(lambda self: self._last_revid, None, None)

189

190

count = property(lambda self: self._count, None, None)

191

279

192

280

@with_branch_lock

193

def get_revision(self, revid):

194

return self._branch.repository.get_revision(revid)

195

281

def get_config(self):

282

return self._branch.get_config()

283

196

284

def get_revno(self, revid):

197

285

if revid not in self._revision_info:

198

286

# ghost parent?

200

288

seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]

201

289

return revno_str

202

290

203

def get_sequence(self, revid):

204

seq, revid, merge_depth, revno_str, end_of_merge = self._revision_info[revid]

205

return seq

206

207

291

def get_revision_history(self):

208

292

return self._full_history

209

210

def get_revid_sequence(self, revid_list, revid):

211

"""

212

given a list of revision ids, return the sequence # of this revid in

213

the list.

214

"""

215

seq = 0

216

for r in revid_list:

217

if revid == r:

218

return seq

219

seq += 1

220

293

221

294

def get_revids_from(self, revid_list, revid):

222

295

"""

223

296

given a list of revision ids, yield revisions in graph order,

245

318

revids = [r for r in self._full_history if r in w_revids]

246

319

return revids

247

320

321

@with_branch_lock

322

def get_revision_history_since(self, revid_list, date):

323

# if a user asks for revisions starting at 01-sep, they mean inclusive,

324

# so start at midnight on 02-sep.

325

date = date + datetime.timedelta(days=1)

326

# our revid list is sorted in REVERSE date order, so go thru some hoops here...

327

revid_list.reverse()

328

index = bisect.bisect(_RevListToTimestamps(revid_list, self._branch.repository), date)

329

if index == 0:

330

return []

331

revid_list.reverse()

332

index = -index

333

return revid_list[index:]

334

335

@with_branch_lock

336

def get_revision_history_matching(self, revid_list, text):

337

self.log.debug('searching %d revisions for %r', len(revid_list), text)

338

z = time.time()

339

# this is going to be painfully slow. :(

340

out = []

341

text = text.lower()

342

for revid in revid_list:

343

change = self.get_changes([ revid ])[0]

344

if text in change.comment.lower():

345

out.append(revid)

346

self.log.debug('searched %d revisions for %r in %r secs', len(revid_list), text, time.time() - z)

347

return out

348

349

def get_revision_history_matching_indexed(self, revid_list, text):

350

self.log.debug('searching %d revisions for %r', len(revid_list), text)

351

z = time.time()

352

if self._index is None:

353

return self.get_revision_history_matching(revid_list, text)

354

out = self._index.find(text, revid_list)

355

self.log.debug('searched %d revisions for %r in %r secs: %d results', len(revid_list), text, time.time() - z, len(out))

356

# put them in some coherent order :)

357

out = [r for r in self._full_history if r in out]

358

return out

359

360

@with_branch_lock

361

def get_search_revid_list(self, query, revid_list):

362

"""

363

given a "quick-search" query, try a few obvious possible meanings:

364

365

- revision id or # ("128.1.3")

366

- date (US style "mm/dd/yy", earth style "dd-mm-yy", or iso style "yyyy-mm-dd")

367

- comment text as a fallback

368

369

and return a revid list that matches.

370

"""

371

# FIXME: there is some silliness in this action. we have to look up

372

# all the relevant changes (time-consuming) only to return a list of

373

# revids which will be used to fetch a set of changes again.

374

375

# if they entered a revid, just jump straight there; ignore the passed-in revid_list

376

revid = self.fix_revid(query)

377

if revid is not None:

378

if isinstance(revid, unicode):

379

revid = revid.encode('utf-8')

380

changes = self.get_changes([ revid ])

381

if (changes is not None) and (len(changes) > 0):

382

return [ revid ]

383

384

date = None

385

m = self.us_date_re.match(query)

386

if m is not None:

387

date = datetime.datetime(util.fix_year(int(m.group(3))), int(m.group(1)), int(m.group(2)))

388

else:

389

m = self.earth_date_re.match(query)

390

if m is not None:

391

date = datetime.datetime(util.fix_year(int(m.group(3))), int(m.group(2)), int(m.group(1)))

392

else:

393

m = self.iso_date_re.match(query)

394

if m is not None:

395

date = datetime.datetime(util.fix_year(int(m.group(1))), int(m.group(2)), int(m.group(3)))

396

if date is not None:

397

if revid_list is None:

398

# if no limit to the query was given, search only the direct-parent path.

399

revid_list = list(self.get_revids_from(None, self._last_revid))

400

return self.get_revision_history_since(revid_list, date)

401

402

# check comment fields.

403

if revid_list is None:

404

revid_list = self._full_history

405

return self.get_revision_history_matching_indexed(revid_list, query)

406

248

407

revno_re = re.compile(r'^[\d\.]+$')

408

# the date regex are without a final '$' so that queries like

409

# "2006-11-30 12:15" still mostly work. (i think it's better to give

410

# them 90% of what they want instead of nothing at all.)

411

us_date_re = re.compile(r'^(\d{1,2})/(\d{1,2})/(\d\d(\d\d?))')

412

earth_date_re = re.compile(r'^(\d{1,2})-(\d{1,2})-(\d\d(\d\d?))')

413

iso_date_re = re.compile(r'^(\d\d\d\d)-(\d\d)-(\d\d)')

249

414

250

415

def fix_revid(self, revid):

251

416

# if a "revid" is actually a dotted revno, convert it to a revid

252

417

if revid is None:

253

418

return revid

419

if revid == 'head:':

420

return self._last_revid

254

421

if self.revno_re.match(revid):

255

422

revid = self._revno_revid[revid]

256

423

return revid

257

424

258

425

@with_branch_lock

259

def get_navigation(self, revid, path):

426

def get_file_view(self, revid, file_id):

260

427

"""

261

Given an optional revid and optional path, return a (revlist, revid)

262

for navigation through the current scope: from the revid (or the

263

latest revision) back to the original revision.

264

265

If path is None, the entire revision history is the list scope.

266

If revid is None, the latest revision is used.

428

Given a revid and optional path, return a (revlist, revid) for

429

navigation through the current scope: from the revid (or the latest

430

revision) back to the original revision.

431

432

If file_id is None, the entire revision history is the list scope.

267

433

"""

268

434

if revid is None:

269

435

revid = self._last_revid

270

if path is not None:

271

# since revid is 'start_revid', possibly should start the path tracing from revid... FIXME

272

inv = self._branch.repository.get_revision_inventory(revid)

273

revlist = list(self.get_short_revision_history_by_fileid(inv.path2id(path)))

436

if file_id is not None:

437

# since revid is 'start_revid', possibly should start the path

438

# tracing from revid... FIXME

439

revlist = list(self.get_short_revision_history_by_fileid(file_id))

274

440

revlist = list(self.get_revids_from(revlist, revid))

275

441

else:

276

442

revlist = list(self.get_revids_from(None, revid))

277

if revid is None:

278

revid = revlist[0]

279

return revlist, revid

443

return revlist

444

445

@with_branch_lock

446

def get_view(self, revid, start_revid, file_id, query=None):

447

"""

448

use the URL parameters (revid, start_revid, file_id, and query) to

449

determine the revision list we're viewing (start_revid, file_id, query)

450

and where we are in it (revid).

451

452

if a query is given, we're viewing query results.

453

if a file_id is given, we're viewing revisions for a specific file.

454

if a start_revid is given, we're viewing the branch from a

455

specific revision up the tree.

456

(these may be combined to view revisions for a specific file, from

457

a specific revision, with a specific search query.)

458

459

returns a new (revid, start_revid, revid_list, scan_list) where:

460

461

- revid: current position within the view

462

- start_revid: starting revision of this view

463

- revid_list: list of revision ids for this view

464

465

file_id and query are never changed so aren't returned, but they may

466

contain vital context for future url navigation.

467

"""

468

if start_revid is None:

469

start_revid = self._last_revid

470

471

if query is None:

472

revid_list = self.get_file_view(start_revid, file_id)

473

if revid is None:

474

revid = start_revid

475

if revid not in revid_list:

476

# if the given revid is not in the revlist, use a revlist that

477

# starts at the given revid.

478

revid_list= self.get_file_view(revid, file_id)

479

start_revid = revid

480

return revid, start_revid, revid_list

481

482

# potentially limit the search

483

if file_id is not None:

484

revid_list = self.get_file_view(start_revid, file_id)

485

else:

486

revid_list = None

487

488

revid_list = self.get_search_revid_list(query, revid_list)

489

if len(revid_list) > 0:

490

if revid not in revid_list:

491

revid = revid_list[0]

492

return revid, start_revid, revid_list

493

else:

494

# no results

495

return None, None, []

280

496

281

497

@with_branch_lock

282

498

def get_inventory(self, revid):

283

499

return self._branch.repository.get_revision_inventory(revid)

284

500

285

def get_where_merged(self, revid):

286

try:

287

return self._where_merged[revid]

288

except:

289

return []

290

501

@with_branch_lock

502

def get_path(self, revid, file_id):

503

if (file_id is None) or (file_id == ''):

504

return ''

505

path = self._branch.repository.get_revision_inventory(revid).id2path(file_id)

506

if (len(path) > 0) and not path.startswith('/'):

507

path = '/' + path

508

return path

509

510

@with_branch_lock

511

def get_file_id(self, revid, path):

512

if (len(path) > 0) and not path.startswith('/'):

513

path = '/' + path

514

return self._branch.repository.get_revision_inventory(revid).path2id(path)

515

516

291

517

def get_merge_point_list(self, revid):

292

518

"""

293

519

Return the list of revids that have merged this node.

294

520

"""

295

if revid in self._history:

521

if '.' not in self.get_revno(revid):

296

522

return []

297

523

298

524

merge_point = []

299

525

while True:

300

children = self.get_where_merged(revid)

526

children = self._where_merged.get(revid, [])

301

527

nexts = []

302

528

for child in children:

303

529

child_parents = self._revision_graph[child]

317

543

merge_point.extend(merge_point_next)

318

544

319

545

revid = nexts[0]

320

546

321

547

def simplify_merge_point_list(self, revids):

322

548

"""if a revision is already merged, don't show further merge points"""

323

549

d = {}

334

560

d[revnos] = ( revnolast, revid )

335

561

336

562

return [ d[revnos][1] for revnos in d.keys() ]

337

338

def get_changelist(self, revid_list):

339

for revid in revid_list:

340

yield self.get_change(revid)

341

563

564

def get_branch_nicks(self, changes):

565

"""

566

given a list of changes from L{get_changes}, fill in the branch nicks

567

on all parents and merge points.

568

"""

569

fetch_set = set()

570

for change in changes:

571

for p in change.parents:

572

fetch_set.add(p.revid)

573

for p in change.merge_points:

574

fetch_set.add(p.revid)

575

p_changes = self.get_changes(list(fetch_set))

576

p_change_dict = dict([(c.revid, c) for c in p_changes])

577

for change in changes:

578

# arch-converted branches may not have merged branch info :(

579

for p in change.parents:

580

if p.revid in p_change_dict:

581

p.branch_nick = p_change_dict[p.revid].branch_nick

582

else:

583

p.branch_nick = '(missing)'

584

for p in change.merge_points:

585

if p.revid in p_change_dict:

586

p.branch_nick = p_change_dict[p.revid].branch_nick

587

else:

588

p.branch_nick = '(missing)'

589

342

590

@with_branch_lock

343

def get_change(self, revid, get_diffs=False):

591

def get_changes(self, revid_list):

344

592

if self._change_cache is None:

345

return self._get_change(revid, get_diffs)

346

347

# if the revid is in unicode, use the utf-8 encoding as the key

348

srevid = revid

349

if isinstance(revid, unicode):

350

srevid = revid.encode('utf-8')

351

return self._get_change_from_cache(revid, srevid, get_diffs)

352

353

@with_cache_lock

354

def _get_change_from_cache(self, revid, srevid, get_diffs):

355

if get_diffs:

356

cache = self._change_cache_diffs

357

else:

358

cache = self._change_cache

359

360

if srevid in cache:

361

c = cache[srevid]

362

else:

363

if get_diffs and (srevid in self._change_cache):

364

# salvage the non-diff entry for a jump-start

365

c = self._change_cache[srevid]

366

if len(c.parents) == 0:

367

left_parent = None

368

else:

369

left_parent = c.parents[0].revid

370

c.changes = self.diff_revisions(revid, left_parent, get_diffs=True)

371

cache[srevid] = c

372

else:

373

#log.debug('Entry cache miss: %r' % (revid,))

374

c = self._get_change(revid, get_diffs=get_diffs)

375

cache[srevid] = c

376

593

changes = self.get_changes_uncached(revid_list)

594

else:

595

changes = self._change_cache.get_changes(revid_list)

596

if len(changes) == 0:

597

return changes

598

377

599

# some data needs to be recalculated each time, because it may

378

600

# change as new revisions are added.

379

merge_revids = self.simplify_merge_point_list(self.get_merge_point_list(revid))

380

c.merge_points = [util.Container(revid=r, revno=self.get_revno(r)) for r in merge_revids]

381

382

return c

383

601

for change in changes:

602

merge_revids = self.simplify_merge_point_list(self.get_merge_point_list(change.revid))

603

change.merge_points = [util.Container(revid=r, revno=self.get_revno(r)) for r in merge_revids]

604

change.revno = self.get_revno(change.revid)

605

606

parity = 0

607

for change in changes:

608

change.parity = parity

609

parity ^= 1

610

611

return changes

612

384

613

# alright, let's profile this sucka.

385

def _get_change_profiled(self, revid, get_diffs=False):

614

def _get_changes_profiled(self, revid_list, get_diffs=False):

386

615

from loggerhead.lsprof import profile

387

616

import cPickle

388

ret, stats = profile(self._get_change, revid, get_diffs)

617

ret, stats = profile(self.get_changes_uncached, revid_list, get_diffs)

389

618

stats.sort()

390

619

stats.freeze()

391

620

cPickle.dump(stats, open('lsprof.stats', 'w'), 2)

621

self.log.info('lsprof complete!')

392

622

return ret

393

623

394

def _get_change(self, revid, get_diffs=False):

624

def _get_deltas_for_revisions_with_trees(self, entries):

625

"""Produce a generator of revision deltas.

626

627

Note that the input is a sequence of REVISIONS, not revision_ids.

628

Trees will be held in memory until the generator exits.

629

Each delta is relative to the revision's lefthand predecessor.

630

"""

631

required_trees = set()

632

for entry in entries:

633

required_trees.add(entry.revid)

634

required_trees.update([p.revid for p in entry.parents[:1]])

635

trees = dict((t.get_revision_id(), t) for

636

t in self._branch.repository.revision_trees(required_trees))

637

ret = []

638

self._branch.repository.lock_read()

395

639

try:

396

rev = self._branch.repository.get_revision(revid)

397

except (KeyError, bzrlib.errors.NoSuchRevision):

398

# ghosted parent?

399

entry = {

400

'revid': 'missing',

401

'revno': '',

402

'date': datetime.datetime.fromtimestamp(0),

403

'author': 'missing',

404

'branch_nick': None,

405

'short_comment': 'missing',

406

'comment': 'missing',

407

'comment_clean': 'missing',

408

'parents': [],

409

'merge_points': [],

410

'changes': [],

411

}

412

log.error('ghost entry: %r' % (revid,))

413

return util.Container(entry)

414

415

commit_time = datetime.datetime.fromtimestamp(rev.timestamp)

416

417

parents = [util.Container(revid=r, revno=self.get_revno(r)) for r in rev.parent_ids]

418

419

if len(parents) == 0:

420

left_parent = None

421

else:

422

left_parent = rev.parent_ids[0]

423

424

message = rev.message.splitlines()

425

if len(message) == 1:

426

# robey-style 1-line long message

427

message = textwrap.wrap(message[0])

428

429

# make short form of commit message

430

short_message = message[0]

431

if len(short_message) > 60:

432

short_message = short_message[:60] + '...'

640

for entry in entries:

641

if not entry.parents:

642

old_tree = self._branch.repository.revision_tree(

643

bzrlib.revision.NULL_REVISION)

644

else:

645

old_tree = trees[entry.parents[0].revid]

646

tree = trees[entry.revid]

647

ret.append(tree.changes_from(old_tree))

648

return ret

649

finally:

650

self._branch.repository.unlock()

651

652

def entry_from_revision(self, revision):

653

commit_time = datetime.datetime.fromtimestamp(revision.timestamp)

654

655

parents = [util.Container(revid=r, revno=self.get_revno(r)) for r in revision.parent_ids]

656

657

message, short_message = clean_message(revision.message)

433

658

434

659

entry = {

435

'revid': revid,

436

'revno': self.get_revno(revid),

660

'revid': revision.revision_id,

437

661

'date': commit_time,

438

'author': rev.committer,

439

'branch_nick': rev.properties.get('branch-nick', None),

662

'author': revision.committer,

663

'branch_nick': revision.properties.get('branch-nick', None),

440

664

'short_comment': short_message,

441

'comment': rev.message,

665

'comment': revision.message,

442

666

'comment_clean': [util.html_clean(s) for s in message],

443

667

'parents': parents,

444

'changes': self.diff_revisions(revid, left_parent, get_diffs=get_diffs),

445

668

}

446

669

return util.Container(entry)

447

448

def scan_range(self, revlist, revid, pagesize=20):

449

"""

450

yield a list of (label, title, revid) for a scan range through the full

451

branch history, centered around the given revid.

452

453

example: [ ('<<', 'Previous page', 'rrr'), ('-10', 'Forward 10', 'rrr'),

454

('*', None, None), ('+10', 'Back 10', 'rrr'),

455

('+30', 'Back 30', 'rrr'), ('>>', 'Next page', 'rrr') ]

456

457

next/prev page are always using the pagesize.

458

"""

459

count = len(revlist)

460

pos = self.get_revid_sequence(revlist, revid)

461

462

if pos > 0:

463

yield (u'\xab', 'Previous page', revlist[max(0, pos - pagesize)])

670

671

@with_branch_lock

672

def get_changes_uncached(self, revid_list):

673

# Because we may loop and call get_revisions multiple times (to throw

674

# out dud revids), we grab a read lock.

675

self._branch.lock_read()

676

try:

677

while True:

678

try:

679

rev_list = self._branch.repository.get_revisions(revid_list)

680

except (KeyError, bzrlib.errors.NoSuchRevision), e:

681

# this sometimes happens with arch-converted branches.

682

# i don't know why. :(

683

self.log.debug('No such revision (skipping): %s', e)

684

revid_list.remove(e.revision)

685

else:

686

break

687

688

return [self.entry_from_revision(rev) for rev in rev_list]

689

finally:

690

self._branch.unlock()

691

692

def get_file_changes_uncached(self, entries):

693

delta_list = self._get_deltas_for_revisions_with_trees(entries)

694

695

return [self.parse_delta(delta) for delta in delta_list]

696

697

@with_branch_lock

698

def get_file_changes(self, entries):

699

if self._file_change_cache is None:

700

return self.get_file_changes_uncached(entries)

464

701

else:

465

yield (u'\xab', None, None)

466

467

offset_sign = -1

468

for offset in util.scan_range(pos, count, pagesize):

469

if (offset > 0) and (offset_sign < 0):

470

offset_sign = 0

471

# show current position

472

# yield ('[%s]' % (self.get_revno(revlist[pos]),), None, None)

473

# yield (u'\u2022', None, None)

474

yield (u'\u00b7', None, None)

475

if offset < 0:

476

title = 'Back %d' % (-offset,)

702

return self._file_change_cache.get_file_changes(entries)

703

704

def add_changes(self, entries):

705

changes_list = self.get_file_changes(entries)

706

707

for entry, changes in zip(entries, changes_list):

708

entry.changes = changes

709

710

@with_branch_lock

711

def get_change_with_diff(self, revid, compare_revid=None):

712

entry = self.get_changes([revid])[0]

713

714

if compare_revid is None:

715

if entry.parents:

716

compare_revid = entry.parents[0].revid

477

717

else:

478

title = 'Forward %d' % (offset,)

479

yield ('%+d' % (offset,), title, revlist[pos + offset])

480

481

if pos < count - 1:

482

yield (u'\xbb', 'Next page', revlist[min(count - 1, pos + pagesize)])

483

else:

484

yield (u'\xbb', None, None)

485

718

compare_revid = 'null:'

719

720

rev_tree1 = self._branch.repository.revision_tree(compare_revid)

721

rev_tree2 = self._branch.repository.revision_tree(revid)

722

delta = rev_tree2.changes_from(rev_tree1)

723

724

entry.changes = self.parse_delta(delta)

725

726

entry.changes.modified = self._parse_diffs(rev_tree1, rev_tree2, delta)

727

728

return entry

729

486

730

@with_branch_lock

487

def diff_revisions(self, revid, otherrevid, get_diffs=True):

731

def get_file(self, file_id, revid):

732

"returns (path, filename, data)"

733

inv = self.get_inventory(revid)

734

inv_entry = inv[file_id]

735

rev_tree = self._branch.repository.revision_tree(inv_entry.revision)

736

path = inv.id2path(file_id)

737

if not path.startswith('/'):

738

path = '/' + path

739

return path, inv_entry.name, rev_tree.get_file_text(file_id)

740

741

def _parse_diffs(self, old_tree, new_tree, delta):

488

742

"""

489

Return a nested data structure containing the changes between two

490

revisions::

491

492

added: list(filename),

493

renamed: list((old_filename, new_filename)),

494

deleted: list(filename),

495

modified: list(

743

Return a list of processed diffs, in the format::

744

745

list(

496

746

filename: str,

747

file_id: str,

497

748

chunks: list(

498

749

diff: list(

499

750

old_lineno: int,

503

754

504

755

505

756

)

506

507

if C{get_diffs} is false, the C{chunks} will be omitted.

508

"""

509

510

new_tree = self._branch.repository.revision_tree(revid)

511

old_tree = self._branch.repository.revision_tree(otherrevid)

512

delta = new_tree.changes_from(old_tree)

513

757

"""

758

process = []

759

out = []

760

761

for old_path, new_path, fid, kind, text_modified, meta_modified in delta.renamed:

762

if text_modified:

763

process.append((old_path, new_path, fid, kind))

764

for path, fid, kind, text_modified, meta_modified in delta.modified:

765

process.append((path, path, fid, kind))

766

767

for old_path, new_path, fid, kind in process:

768

old_lines = old_tree.get_file_lines(fid)

769

new_lines = new_tree.get_file_lines(fid)

770

buffer = StringIO()

771

if old_lines != new_lines:

772

try:

773

bzrlib.diff.internal_diff(old_path, old_lines,

774

new_path, new_lines, buffer)

775

except bzrlib.errors.BinaryFile:

776

diff = ''

777

else:

778

diff = buffer.getvalue()

779

else:

780

diff = ''

781

out.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid, chunks=self._process_diff(diff)))

782

783

return out

784

785

def _process_diff(self, diff):

786

# doesn't really need to be a method; could be static.

787

chunks = []

788

chunk = None

789

for line in diff.splitlines():

790

if len(line) == 0:

791

continue

792

if line.startswith('+++ ') or line.startswith('--- '):

793

continue

794

if line.startswith('@@ '):

795

# new chunk

796

if chunk is not None:

797

chunks.append(chunk)

798

chunk = util.Container()

799

chunk.diff = []

800

lines = [int(x.split(',')[0][1:]) for x in line.split(' ')[1:3]]

801

old_lineno = lines[0]

802

new_lineno = lines[1]

803

elif line.startswith(' '):

804

chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=new_lineno,

805

type='context', line=util.fixed_width(line[1:])))

806

old_lineno += 1

807

new_lineno += 1

808

elif line.startswith('+'):

809

chunk.diff.append(util.Container(old_lineno=None, new_lineno=new_lineno,

810

type='insert', line=util.fixed_width(line[1:])))

811

new_lineno += 1

812

elif line.startswith('-'):

813

chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=None,

814

type='delete', line=util.fixed_width(line[1:])))

815

old_lineno += 1

816

else:

817

chunk.diff.append(util.Container(old_lineno=None, new_lineno=None,

818

type='unknown', line=util.fixed_width(repr(line))))

819

if chunk is not None:

820

chunks.append(chunk)

821

return chunks

822

823

def parse_delta(self, delta):

824

"""

825

Return a nested data structure containing the changes in a delta::

826

827

added: list((filename, file_id)),

828

renamed: list((old_filename, new_filename, file_id)),

829

deleted: list((filename, file_id)),

830

modified: list(

831

filename: str,

832

file_id: str,

833

)

834

"""

514

835

added = []

515

836

modified = []

516

837

renamed = []

517

838

removed = []

518

519

def rich_filename(path, kind):

520

if kind == 'directory':

521

path += '/'

522

if kind == 'symlink':

523

path += '@'

524

return path

525

526

def tree_lines(tree, fid):

527

if not fid in tree:

528

return []

529

tree_file = bzrlib.textfile.text_file(tree.get_file(fid))

530

return tree_file.readlines()

531

532

def process_diff(diff):

533

chunks = []

534

chunk = None

535

for line in diff.splitlines():

536

if len(line) == 0:

537

continue

538

if line.startswith('+++ ') or line.startswith('--- '):

539

continue

540

if line.startswith('@@ '):

541

# new chunk

542

if chunk is not None:

543

chunks.append(chunk)

544

chunk = util.Container()

545

chunk.diff = []

546

lines = [int(x.split(',')[0][1:]) for x in line.split(' ')[1:3]]

547

old_lineno = lines[0]

548

new_lineno = lines[1]

549

elif line.startswith(' '):

550

chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=new_lineno,

551

type='context', line=util.html_clean(line[1:])))

552

old_lineno += 1

553

new_lineno += 1

554

elif line.startswith('+'):

555

chunk.diff.append(util.Container(old_lineno=None, new_lineno=new_lineno,

556

type='insert', line=util.html_clean(line[1:])))

557

new_lineno += 1

558

elif line.startswith('-'):

559

chunk.diff.append(util.Container(old_lineno=old_lineno, new_lineno=None,

560

type='delete', line=util.html_clean(line[1:])))

561

old_lineno += 1

562

else:

563

chunk.diff.append(util.Container(old_lineno=None, new_lineno=None,

564

type='unknown', line=util.html_clean(repr(line))))

565

if chunk is not None:

566

chunks.append(chunk)

567

return chunks

568

569

def handle_modify(old_path, new_path, fid, kind):

570

if not get_diffs:

571

modified.append(util.Container(filename=rich_filename(new_path, kind)))

572

return

573

old_lines = tree_lines(old_tree, fid)

574

new_lines = tree_lines(new_tree, fid)

575

buffer = StringIO()

576

bzrlib.diff.internal_diff(old_path, old_lines, new_path, new_lines, buffer)

577

diff = buffer.getvalue()

578

modified.append(util.Container(filename=rich_filename(new_path, kind), chunks=process_diff(diff)))

579

839

580

840

for path, fid, kind in delta.added:

581

added.append(rich_filename(path, kind))

582

841

added.append((rich_filename(path, kind), fid))

842

583

843

for path, fid, kind, text_modified, meta_modified in delta.modified:

584

handle_modify(path, path, fid, kind)

585

586

for oldpath, newpath, fid, kind, text_modified, meta_modified in delta.renamed:

587

renamed.append((rich_filename(oldpath, kind), rich_filename(newpath, kind)))

844

modified.append(util.Container(filename=rich_filename(path, kind), file_id=fid))

845

846

for old_path, new_path, fid, kind, text_modified, meta_modified in delta.renamed:

847

renamed.append((rich_filename(old_path, kind), rich_filename(new_path, kind), fid))

588

848

if meta_modified or text_modified:

589

handle_modify(oldpath, newpath, fid, kind)

590

849

modified.append(util.Container(filename=rich_filename(new_path, kind), file_id=fid))

850

591

851

for path, fid, kind in delta.removed:

592

removed.append(rich_filename(path, kind))

593

852

removed.append((rich_filename(path, kind), fid))

853

594

854

return util.Container(added=added, renamed=renamed, removed=removed, modified=modified)

595

855

856

@staticmethod

857

def add_side_by_side(changes):

858

# FIXME: this is a rotten API.

859

for change in changes:

860

for m in change.changes.modified:

861

m.sbs_chunks = _make_side_by_side(m.chunks)

862

596

863

@with_branch_lock

597

def get_filelist(self, inv, path):

864

def get_filelist(self, inv, file_id, sort_type=None):

598

865

"""

599

866

return the list of all files (and their attributes) within a given

600

867

path subtree.

601

868

"""

602

while path.endswith('/'):

603

path = path[:-1]

604

if path.startswith('/'):

605

path = path[1:]

606

parity = 0

607

for filepath, entry in inv.entries():

608

if posixpath.dirname(filepath) != path:

609

continue

610

filename = posixpath.basename(filepath)

611

rich_filename = filename

869

870

dir_ie = inv[file_id]

871

path = inv.id2path(file_id)

872

file_list = []

873

874

revid_set = set()

875

876

for filename, entry in dir_ie.children.iteritems():

877

revid_set.add(entry.revision)

878

879

change_dict = {}

880

for change in self.get_changes(list(revid_set)):

881

change_dict[change.revid] = change

882

883

for filename, entry in dir_ie.children.iteritems():

612

884

pathname = filename

613

885

if entry.kind == 'directory':

614

886

pathname += '/'

615

616

# last change:

887

617

888

revid = entry.revision

618

change = self.get_change(revid)

619

620

yield util.Container(filename=filename, rich_filename=rich_filename, executable=entry.executable, kind=entry.kind,

621

pathname=pathname, revid=revid, change=change, parity=parity)

889

890

file = util.Container(

891

filename=filename, executable=entry.executable, kind=entry.kind,

892

pathname=pathname, file_id=entry.file_id, size=entry.text_size,

893

revid=revid, change=change_dict[revid])

894

file_list.append(file)

895

896

if sort_type == 'filename' or sort_type is None:

897

file_list.sort(key=lambda x: x.filename)

898

elif sort_type == 'size':

899

file_list.sort(key=lambda x: x.size)

900

elif sort_type == 'date':

901

file_list.sort(key=lambda x: x.change.date)

902

903

parity = 0

904

for file in file_list:

905

file.parity = parity

622

906

parity ^= 1

623

pass

907

908

return file_list

909

910

911

_BADCHARS_RE = re.compile(ur'[\x00-\x08\x0b\x0e-\x1f]')

624

912

625

913

@with_branch_lock

626

914

def annotate_file(self, file_id, revid):

627

915

z = time.time()

628

916

lineno = 1

629

917

parity = 0

630

918

631

919

file_revid = self.get_inventory(revid)[file_id].revision

632

920

oldvalues = None

633

revision_cache = {}

634

921

635

922

# because we cache revision metadata ourselves, it's actually much

636

923

# faster to call 'annotate_iter' on the weave directly than it is to

637

924

# ask bzrlib to annotate for us.

638

925

w = self._branch.repository.weave_store.get_weave(file_id, self._branch.repository.get_transaction())

926

927

revid_set = set()

928

for line_revid, text in w.annotate_iter(file_revid):

929

revid_set.add(line_revid)

930

if self._BADCHARS_RE.match(text):

931

# bail out; this isn't displayable text

932

yield util.Container(parity=0, lineno=1, status='same',

933

text='(This is a binary file.)',

934

change=util.Container())

935

return

936

change_cache = dict([(c.revid, c) for c in self.get_changes(list(revid_set))])

937

639

938

last_line_revid = None

640

939

for line_revid, text in w.annotate_iter(file_revid):

641

940

if line_revid == last_line_revid:

645

944

status = 'changed'

646

945

parity ^= 1

647

946

last_line_revid = line_revid

648

change = revision_cache.get(line_revid, None)

649

if change is None:

650

change = self.get_change(line_revid)

651

revision_cache[line_revid] = change

947

change = change_cache[line_revid]

652

948

trunc_revno = change.revno

653

949

if len(trunc_revno) > 10:

654

950

trunc_revno = trunc_revno[:9] + '...'

655

951

656

952

yield util.Container(parity=parity, lineno=lineno, status=status,

657

trunc_revno=trunc_revno, change=change, text=util.html_clean(text))

953

change=change, text=util.fixed_width(text))

658

954

lineno += 1

659

660

log.debug('annotate: %r secs' % (time.time() - z,))

955

956

self.log.debug('annotate: %r secs' % (time.time() - z,))

957

958

@with_branch_lock

959

def get_bundle(self, revid, compare_revid=None):

960

if compare_revid is None:

961

parents = self._revision_graph[revid]

962

if len(parents) > 0:

963

compare_revid = parents[0]

964

else:

965

compare_revid = None

966

s = StringIO()

967

bzrlib.bundle.serializer.write_bundle(self._branch.repository, revid, compare_revid, s)

968

return s.getvalue()

969

Older »