~loggerhead-team/loggerhead/trunk-rich

« back to all changes in this revision

Viewing changes to loggerhead/changecache.py

  • Committer: Martin Pool
  • Date: 2009-01-23 17:53:17 UTC
  • mto: This revision was merged to the branch mainline in revision 298.
  • Revision ID: mbp@sourcefrog.net-20090123175317-zfil1x0wmq9a740k
Stub code to allow loggerhead to load as a plugin

Show diffs side-by-side

added added

removed removed

Lines of Context:
17
17
#
18
18
 
19
19
"""
20
 
a cache for chewed-up 'file change' data structures, which are basically just
21
 
a different way of storing a revision delta.  the cache improves lookup times
22
 
10x over bazaar's xml revision structure, though, so currently still worth
23
 
doing.
 
20
a cache for chewed-up "change" data structures, which are basically just a
 
21
different way of storing a revision.  the cache improves lookup times 10x
 
22
over bazaar's xml revision structure, though, so currently still worth doing.
24
23
 
25
24
once a revision is committed in bazaar, it never changes, so once we have
26
25
cached a change, it's good forever.
27
26
"""
28
27
 
29
28
import cPickle
30
 
import marshal
31
29
import os
32
 
import tempfile
33
 
import zlib
 
30
 
 
31
from loggerhead import util
 
32
from loggerhead.lockfile import LockFile
 
33
 
 
34
with_lock = util.with_lock('_lock', 'ChangeCache')
34
35
 
35
36
try:
36
37
    from sqlite3 import dbapi2
37
38
except ImportError:
38
39
    from pysqlite2 import dbapi2
39
40
 
40
 
# We take an optimistic approach to concurrency here: we might do work twice
41
 
# in the case of races, but not crash or corrupt data.
42
 
 
43
 
def safe_init_db(filename, init_sql):
44
 
    # To avoid races around creating the database, we create the db in
45
 
    # a temporary file and rename it into the ultimate location.
46
 
    fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename))
47
 
    os.close(fd)
48
 
    con = dbapi2.connect(temp_path)
49
 
    cur = con.cursor()
50
 
    cur.execute(init_sql)
51
 
    con.commit()
52
 
    con.close()
53
 
    os.rename(temp_path, filename)
54
41
 
55
42
class FakeShelf(object):
56
43
 
57
44
    def __init__(self, filename):
58
45
        create_table = not os.path.exists(filename)
59
 
        if create_table:
60
 
            safe_init_db(
61
 
                filename, "create table RevisionData "
62
 
                "(revid binary primary key, data binary)")
63
46
        self.connection = dbapi2.connect(filename)
64
47
        self.cursor = self.connection.cursor()
 
48
        if create_table:
 
49
            self._create_table()
65
50
 
66
 
    def _create_table(self, filename):
67
 
        con = dbapi2.connect(filename)
68
 
        cur = con.cursor()
69
 
        cur.execute(
 
51
    def _create_table(self):
 
52
        self.cursor.execute(
70
53
            "create table RevisionData "
71
54
            "(revid binary primary key, data binary)")
72
 
        con.commit()
73
 
        con.close()
 
55
        self.connection.commit()
74
56
 
75
57
    def _serialize(self, obj):
76
 
        return dbapi2.Binary(cPickle.dumps(obj, protocol=2))
 
58
        r = dbapi2.Binary(cPickle.dumps(obj, protocol=2))
 
59
        return r
77
60
 
78
61
    def _unserialize(self, data):
79
62
        return cPickle.loads(str(data))
87
70
        else:
88
71
            return self._unserialize(filechange[0])
89
72
 
90
 
    def add(self, revid, object):
91
 
        try:
 
73
    def add(self, revid_obj_pairs):
 
74
        for (r, d) in revid_obj_pairs:
92
75
            self.cursor.execute(
93
76
                "insert into revisiondata (revid, data) values (?, ?)",
94
 
                (revid, self._serialize(object)))
95
 
            self.connection.commit()
96
 
        except dbapi2.IntegrityError:
97
 
            # If another thread or process attempted to set the same key, we
98
 
            # assume it set it to the same value and carry on with our day.
99
 
            pass
 
77
                (r, self._serialize(d)))
 
78
        self.connection.commit()
100
79
 
101
80
 
102
81
class FileChangeCache(object):
103
82
 
104
 
    def __init__(self, cache_path):
 
83
    def __init__(self, history, cache_path):
 
84
        self.history = history
105
85
 
106
86
        if not os.path.exists(cache_path):
107
87
            os.mkdir(cache_path)
108
88
 
109
89
        self._changes_filename = os.path.join(cache_path, 'filechanges.sql')
110
90
 
111
 
    def get_file_changes(self, entry):
 
91
        # use a lockfile since the cache folder could be shared across
 
92
        # different processes.
 
93
        self._lock = LockFile(os.path.join(cache_path, 'filechange-lock'))
 
94
 
 
95
    @with_lock
 
96
    def get_file_changes(self, entries):
 
97
        out = []
 
98
        missing_entries = []
 
99
        missing_entry_indices = []
112
100
        cache = FakeShelf(self._changes_filename)
113
 
        changes = cache.get(entry.revid)
114
 
        if changes is None:
115
 
            changes = self.history.get_file_changes_uncached(entry)
116
 
            cache.add(entry.revid, changes)
117
 
        return changes
118
 
 
119
 
 
120
 
class RevInfoDiskCache(object):
121
 
    """Like `RevInfoMemoryCache` but backed in a sqlite DB."""
122
 
 
123
 
    def __init__(self, cache_path):
124
 
        if not os.path.exists(cache_path):
125
 
            os.mkdir(cache_path)
126
 
        filename = os.path.join(cache_path, 'revinfo.sql')
127
 
        create_table = not os.path.exists(filename)
128
 
        if create_table:
129
 
            safe_init_db(
130
 
                filename, "create table Data "
131
 
                "(key binary primary key, revid binary, data binary)")
132
 
        self.connection = dbapi2.connect(filename)
133
 
        self.cursor = self.connection.cursor()
134
 
 
135
 
    def get(self, key, revid):
136
 
        self.cursor.execute(
137
 
            "select revid, data from data where key = ?", (dbapi2.Binary(key),))
138
 
        row = self.cursor.fetchone()
139
 
        if row is None:
140
 
            return None
141
 
        elif str(row[0]) != revid:
142
 
            return None
143
 
        else:
144
 
            return marshal.loads(zlib.decompress(row[1]))
145
 
 
146
 
    def set(self, key, revid, data):
147
 
        try:
148
 
            self.cursor.execute(
149
 
                'delete from data where key = ?', (dbapi2.Binary(key), ))
150
 
            blob = zlib.compress(marshal.dumps(data))
151
 
            self.cursor.execute(
152
 
                "insert into data (key, revid, data) values (?, ?, ?)",
153
 
                map(dbapi2.Binary, [key, revid, blob]))
154
 
            self.connection.commit()
155
 
        except dbapi2.IntegrityError:
156
 
            # If another thread or process attempted to set the same key, we
157
 
            # don't care too much -- it's only a cache after all!
158
 
            pass
 
101
        for entry in entries:
 
102
            changes = cache.get(entry.revid)
 
103
            if changes is not None:
 
104
                out.append(changes)
 
105
            else:
 
106
                missing_entries.append(entry)
 
107
                missing_entry_indices.append(len(out))
 
108
                out.append(None)
 
109
        if missing_entries:
 
110
            missing_changes = self.history.get_file_changes_uncached(
 
111
                                  missing_entries)
 
112
            revid_changes_pairs = []
 
113
            for i, entry, changes in zip(
 
114
                missing_entry_indices, missing_entries, missing_changes):
 
115
                revid_changes_pairs.append((entry.revid, changes))
 
116
                out[i] = changes
 
117
            cache.add(revid_changes_pairs)
 
118
        return out