1
# Copyright (c) 2012 Canonical Ltd
3
# This program is free software: you can redistribute it and/or modify
4
# it under the terms of the GNU Affero General Public License as published by
5
# the Free Software Foundation, either version 3 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU Affero General Public License for more details.
13
# You should have received a copy of the GNU Affero General Public
14
# License along with this program. If not, see
15
# <http://www.gnu.org/licenses/>.
20
from email.utils import parsedate_tz
26
from pycassa.system_manager import (
31
from pycassa.util import convert_time_to_uuid
33
from grackle.cassandra import workaround_1779
36
def create_schema(host, keyspace, clobber=False, create_keyspace=False):
37
mgr = SystemManager(host)
40
mgr.create_keyspace(keyspace, replication_factor=1)
43
for cf in mgr.get_keyspace_column_families(keyspace):
44
mgr.drop_column_family(keyspace, cf)
48
mgr.create_column_family, keyspace, 'message',
49
key_validation_class=LEXICAL_UUID_TYPE)
51
mgr.create_column_family, keyspace, 'archive_message',
52
comparator_type=TIME_UUID_TYPE,
53
default_validation_class=LEXICAL_UUID_TYPE)
59
def _parse_message(message):
60
"""Get a date and dict of an RFC822 message."""
61
parsed = email.parser.Parser().parsestr(message)
64
for key in ('from', 'to', 'subject', 'message-id'):
65
value = parsed.get(key, None)
67
message_dict[key] = value
69
date = parsed.get('date')
72
pdate = parsedate_tz(date)
73
date = datetime.datetime(
75
tzinfo=dateutil.tz.tzoffset('', pdate[9]))
78
message_dict['date'] = date.isoformat() if date is not None else None
80
return date, message_dict
83
def _utc_datetime(dt):
84
return dt.astimezone(dateutil.tz.tzutc())
87
def _utc_timestamp(dt):
88
return time.mktime(_utc_datetime(dt).timetuple()) - time.timezone
91
def _utc_timeuuid(dt, lowest_val=True):
92
return convert_time_to_uuid(_utc_timestamp(dt), lowest_val)
95
def _cmp_timeuuid(a, b):
97
return cmp(a.time, b.time)
101
def _bound_timeuuid(a, b, max=False):
102
if a == '' or _cmp_timeuuid(b, a) == (1 if max else -1):
107
def _make_bounds(memo, range_start, range_finish, backward):
110
memo = uuid.UUID(memo)
115
if range_start is not None:
116
start = _bound_timeuuid(
117
start, _utc_timeuuid(range_start), max=True)
118
if range_finish is not None:
119
finish = _bound_timeuuid(
120
finish, _utc_timeuuid(range_finish, lowest_val=False))
121
return memo, start, finish
124
def _format_message(message):
126
'date': message.get('date'),
127
'from': message.get('from'),
128
'subject': message.get('subject'),
129
'message-id': message.get('message-id'),
130
'content': message.get('content'),
134
class CassandraConnection(object):
136
def __init__(self, keyspace, host):
137
self._keyspace = keyspace
139
self._connection = self._connect()
140
self.messages = self._column_family('message')
141
self.archive_messages = self._column_family('archive_message')
144
return pycassa.connect(self._keyspace, self._host)
146
def _column_family(self, name):
147
return pycassa.ColumnFamily(self._connection, name)
149
def add_message(self, archive_uuid, message):
150
message_uuid = uuid.uuid4()
151
message_date, message_dict = _parse_message(message)
152
message_dict['content'] = message
153
message_dict['date_created'] = (
154
datetime.datetime.utcnow().isoformat() + 'Z')
155
self.messages.insert(message_uuid, message_dict)
156
self.archive_messages.insert(
158
{_utc_timestamp(message_date): message_uuid})
160
'Imported %s into %s'
161
% (message_dict.get('message-id', None), archive_uuid))
164
def _trim(self, sequence, end):
165
"""Return the sequence with one of the ends trimmed.
167
:param end: if true, remove the last element. otherwise remove
175
def get_messages(self, archive_uuid, order, count, memo, backward=False,
176
start_date=None, finish_date=None):
177
if order in ("date", "-date"):
178
reversed = order[0] == '-'
180
raise AssertionError("Unsupported order.")
182
memo, start, finish = _make_bounds(
183
memo, start_date, finish_date, backward)
185
# Get up to n+1 messages from the memo: the last item of the
186
# previous batch (because that's where the memo starts) + this
188
pairs = self.archive_messages.get(
189
archive_uuid, column_count=count + 1, column_start=start,
190
column_finish=finish, column_reversed=reversed).items()
192
if len(pairs) and memo and pairs[0][0] <= memo:
193
# The memo (from the previous batch) was included in the result.
195
pairs = self._trim(pairs, False ^ backward)
196
elif len(pairs) > count:
197
# There was no memo in the result, so the n+1th element is
198
# unnecessary. Kill it.
199
pairs = self._trim(pairs, True ^ backward)
202
return (None, [], None)
204
assert 0 < len(pairs) <= count
206
# We've narrowed down the message references. Fetch the messages.
207
ids = [v for k, v in pairs]
208
messages = self.messages.multiget(
209
ids, columns=['date', 'from', 'subject', 'message-id', 'content'])
213
[_format_message(messages[id]) for id in ids],