81
78
return date, message_dict
84
def _utc_datetime(dt):
85
return dt.astimezone(dateutil.tz.tzutc())
88
def _utc_timestamp(dt):
89
return calendar.timegm(_utc_datetime(dt).timetuple())
92
def _utc_timeuuid(dt, lowest_val=True):
93
return convert_time_to_uuid(_utc_timestamp(dt), lowest_val)
96
def _cmp_timeuuid(a, b):
98
return cmp(a.time, b.time)
102
def _bound_timeuuid(a, b, max=False):
103
if a == '' or _cmp_timeuuid(b, a) == (1 if max else -1):
108
def _make_bounds(memo, range_start, range_finish, backward):
111
memo = uuid.UUID(memo)
116
if range_start is not None:
117
start = _bound_timeuuid(
118
start, _utc_timeuuid(range_start), max=True)
119
if range_finish is not None:
120
finish = _bound_timeuuid(
121
finish, _utc_timeuuid(range_finish, lowest_val=False))
122
return memo, start, finish
125
LEGAL_HEADERS = set([
126
'date', 'from', 'subject', 'message-id',
130
def _format_message(message, headers=[], include_raw=False):
134
assert not set(headers).difference(LEGAL_HEADERS)
136
for header in headers:
137
hdict[header] = message.get(header)
138
data['headers'] = hdict
141
data['raw'] = message['raw']
146
81
class CassandraConnection(object):
148
def __init__(self, keyspace, hosts):
83
def __init__(self, keyspace, host):
149
84
self._keyspace = keyspace
151
86
self._connection = self._connect()
152
self._pool = self._connect()
153
87
self.messages = self._column_family('message')
154
88
self.archive_messages = self._column_family('archive_message')
156
90
def _connect(self):
157
return pycassa.pool.ConnectionPool(self._keyspace, self._hosts)
91
return pycassa.connect(self._keyspace, self._host)
159
93
def _column_family(self, name):
160
return pycassa.ColumnFamily(self._pool, name)
94
return pycassa.ColumnFamily(self._connection, name)
162
96
def add_message(self, archive_uuid, message):
163
97
message_uuid = uuid.uuid4()
164
98
message_date, message_dict = _parse_message(message)
165
message_dict['raw'] = message
99
message_dict['content'] = message
166
100
message_dict['date_created'] = (
167
101
datetime.datetime.utcnow().isoformat() + 'Z')
168
102
self.messages.insert(message_uuid, message_dict)
169
103
self.archive_messages.insert(
171
{_utc_timestamp(message_date): message_uuid})
105
{message_date.astimezone(dateutil.tz.tzutc()): message_uuid})
173
107
'Imported %s into %s'
174
108
% (message_dict.get('message-id', None), archive_uuid))
175
109
return message_uuid
111
def _format_message(self, message):
113
'date': message.get('date'),
114
'from': message.get('from'),
115
'subject': message.get('subject'),
116
'message-id': message.get('message-id'),
177
119
def _trim(self, sequence, end):
178
"""Return the sequence with one of the ends trimmed.
180
:param end: if true, remove the last element. otherwise remove
184
121
return sequence[:-1]
186
123
return sequence[1:]
188
def get_messages(self, archive_uuid, order, count, memo, backward=False,
189
start_date=None, finish_date=None, format='all',
190
headers=['from', 'date', 'subject', 'message-id']):
125
def get_messages(self, archive_uuid, order, count, memo, backward=False):
191
126
if order in ("date", "-date"):
192
127
reversed = order[0] == '-'
194
129
raise AssertionError("Unsupported order.")
196
memo, start, finish = _make_bounds(
197
memo, start_date, finish_date, backward)
131
memo = uuid.UUID(memo)
199
132
# Get up to n+1 messages from the memo: the last item of the
200
133
# previous batch (because that's where the memo starts) + this
202
141
pairs = self.archive_messages.get(
203
142
archive_uuid, column_count=count + 1, column_start=start,
204
143
column_finish=finish, column_reversed=reversed).items()
206
if len(pairs) and memo and pairs[0][0] <= memo:
207
# The memo (from the previous batch) was included in the result.
144
if memo and len(pairs) and pairs[0][0] <= memo:
209
145
pairs = self._trim(pairs, False ^ backward)
210
146
elif len(pairs) > count:
211
# There was no memo in the result, so the n+1th element is
212
# unnecessary. Kill it.
213
147
pairs = self._trim(pairs, True ^ backward)
215
149
if len(pairs) == 0: