20
20
indexing of the comment text of revisions, for fast searching.
22
two separate 'shelve' files are created:
22
two separate database files are created:
24
24
- recorded: revid -> 1 (if the revid is indexed)
25
25
- index: 3-letter substring -> list(revids)
35
34
from loggerhead import util
36
35
from loggerhead.util import decorator
37
36
from loggerhead.lockfile import LockFile
37
from loggerhead.changecache import FakeShelf
39
39
# if any substring index reaches this many revids, replace the entry with
40
40
# an ALL marker -- it's not worth an explicit index.
69
69
if not os.path.exists(cache_path):
70
70
os.mkdir(cache_path)
72
self._recorded_filename = os.path.join(cache_path, 'textindex-recorded')
73
self._index_filename = os.path.join(cache_path, 'textindex')
72
self._recorded_filename = os.path.join(cache_path, 'textindex-recorded.sql')
73
self._index_filename = os.path.join(cache_path, 'textindex.sql')
75
75
# use a lockfile since the cache folder could be shared across different processes.
76
76
self._lock = LockFile(os.path.join(cache_path, 'index-lock'))
77
77
self._closed = False
79
79
self.log.info('Using search index; %d entries.', len(self))
82
return FakeShelf(self._index_filename)
85
return FakeShelf(self._recorded_filename)
81
87
def _is_indexed(self, revid, recorded):
82
return recorded.get(util.to_utf8(revid), None) is not None
88
return recorded.get(util.to_utf8(revid)) is not None
85
91
def is_indexed(self, revid):
86
recorded = shelve.open(self._recorded_filename, 'c', protocol=2)
92
recorded = self._recorded()
88
94
return self._is_indexed(revid, recorded)
114
recorded = shelve.open(self._recorded_filename, 'c', protocol=2)
120
recorded = self._recorded()
121
last_revid = util.to_utf8(self.history.last_revid)
116
return (len(recorded) >= len(self.history.get_revision_history())) and (util.to_utf8(self.history.last_revid) in recorded)
123
return (recorded.count() >= len(self.history.get_revision_history())
124
and recorded.get(last_revid) is not None)
135
143
revid_set.add(change.revid)
136
144
if len(revid_set) > ALL_THRESHOLD:
138
index[sub] = revid_set
140
recorded[util.to_utf8(change.revid)] = True
147
index.update([(sub, revid_set)], commit=False)
149
index.add([(sub, revid_set)], commit=False)
151
recorded.add([(util.to_utf8(change.revid), True)], commit=False)
143
154
def index_changes(self, revid_list):
144
recorded = shelve.open(self._recorded_filename, 'c', protocol=2)
145
index = shelve.open(self._index_filename, 'c', protocol=2)
155
recorded = self._recorded()
156
index = self._index()
147
158
revid_list = [r for r in revid_list if not self._is_indexed(r, recorded)]
148
159
change_list = self.history.get_changes(revid_list)
149
160
for change in change_list:
150
161
self._index_change(change, recorded, index)
163
index.close(commit=True)
164
recorded.close(commit=True)
156
167
def find(self, text, revid_list=None):
157
index = shelve.open(self._index_filename, 'c', protocol=2)
168
index = self._index()
159
170
text = normalize_string(text)
160
171
if len(text) < 3:
168
179
for i in xrange(len(text) - 2):
169
180
sub = text[i:i + 3]
170
revid_set = index.get(sub, None)
181
revid_set = index.get(sub)
171
182
if revid_set is None:
172
183
# zero matches, stop here.