20
20
indexing of the comment text of revisions, for fast searching.
22
two separate database files are created:
22
two separate 'shelve' files are created:
24
24
- recorded: revid -> 1 (if the revid is indexed)
25
25
- index: 3-letter substring -> list(revids)
34
35
from loggerhead import util
35
36
from loggerhead.util import decorator
36
37
from loggerhead.lockfile import LockFile
37
from loggerhead.changecache import FakeShelf
39
39
# if any substring index reaches this many revids, replace the entry with
40
40
# an ALL marker -- it's not worth an explicit index.
69
69
if not os.path.exists(cache_path):
70
70
os.mkdir(cache_path)
72
self._recorded_filename = os.path.join(cache_path, 'textindex-recorded.sql')
73
self._index_filename = os.path.join(cache_path, 'textindex.sql')
72
self._recorded_filename = os.path.join(cache_path, 'textindex-recorded')
73
self._index_filename = os.path.join(cache_path, 'textindex')
75
75
# use a lockfile since the cache folder could be shared across different processes.
76
76
self._lock = LockFile(os.path.join(cache_path, 'index-lock'))
77
77
self._closed = False
79
79
self.log.info('Using search index; %d entries.', len(self))
82
return FakeShelf(self._index_filename)
85
return FakeShelf(self._recorded_filename)
87
81
def _is_indexed(self, revid, recorded):
88
return recorded.get(util.to_utf8(revid)) is not None
82
return recorded.get(util.to_utf8(revid), None) is not None
91
85
def is_indexed(self, revid):
92
recorded = self._recorded()
86
recorded = shelve.open(self._recorded_filename, 'c', protocol=2)
94
88
return self._is_indexed(revid, recorded)
120
recorded = self._recorded()
121
last_revid = util.to_utf8(self.history.last_revid)
114
recorded = shelve.open(self._recorded_filename, 'c', protocol=2)
123
return (recorded.count() >= len(self.history.get_revision_history())
124
and recorded.get(last_revid) is not None)
116
return (len(recorded) >= len(self.history.get_revision_history())) and (util.to_utf8(self.history.last_revid) in recorded)
143
135
revid_set.add(change.revid)
144
136
if len(revid_set) > ALL_THRESHOLD:
147
index.update([(sub, revid_set)], commit=False)
149
index.add([(sub, revid_set)], commit=False)
151
recorded.add([(util.to_utf8(change.revid), True)], commit=False)
138
index[sub] = revid_set
140
recorded[util.to_utf8(change.revid)] = True
154
143
def index_changes(self, revid_list):
155
recorded = self._recorded()
156
index = self._index()
144
recorded = shelve.open(self._recorded_filename, 'c', protocol=2)
145
index = shelve.open(self._index_filename, 'c', protocol=2)
158
147
revid_list = [r for r in revid_list if not self._is_indexed(r, recorded)]
159
148
change_list = self.history.get_changes(revid_list)
160
149
for change in change_list:
161
150
self._index_change(change, recorded, index)
163
index.close(commit=True)
164
recorded.close(commit=True)
167
156
def find(self, text, revid_list=None):
168
index = self._index()
157
index = shelve.open(self._index_filename, 'c', protocol=2)
170
159
text = normalize_string(text)
171
160
if len(text) < 3:
179
168
for i in xrange(len(text) - 2):
180
169
sub = text[i:i + 3]
181
revid_set = index.get(sub)
170
revid_set = index.get(sub, None)
182
171
if revid_set is None:
183
172
# zero matches, stop here.