108
def _make_bounds(memo, range_start, range_finish, backward):
111
memo = uuid.UUID(memo)
116
if range_start is not None:
117
start = _bound_timeuuid(
118
start, _utc_timeuuid(range_start), max=True)
119
if range_finish is not None:
120
finish = _bound_timeuuid(
121
finish, _utc_timeuuid(range_finish, lowest_val=False))
122
return memo, start, finish
125
LEGAL_HEADERS = set([
126
'date', 'from', 'subject', 'message-id',
130
def _format_message(message, headers=[], include_raw=False):
134
assert not set(headers).difference(LEGAL_HEADERS)
136
for header in headers:
137
hdict[header] = message.get(header)
138
data['headers'] = hdict
141
data['raw'] = message['raw']
107
146
class CassandraConnection(object):
109
def __init__(self, keyspace, host):
148
def __init__(self, keyspace, hosts):
110
149
self._keyspace = keyspace
112
151
self._connection = self._connect()
152
self._pool = self._connect()
113
153
self.messages = self._column_family('message')
114
154
self.archive_messages = self._column_family('archive_message')
116
156
def _connect(self):
117
return pycassa.connect(self._keyspace, self._host)
157
return pycassa.pool.ConnectionPool(self._keyspace, self._hosts)
119
159
def _column_family(self, name):
120
return pycassa.ColumnFamily(self._connection, name)
160
return pycassa.ColumnFamily(self._pool, name)
122
162
def add_message(self, archive_uuid, message):
123
163
message_uuid = uuid.uuid4()
124
164
message_date, message_dict = _parse_message(message)
125
message_dict['content'] = message
165
message_dict['raw'] = message
126
166
message_dict['date_created'] = (
127
167
datetime.datetime.utcnow().isoformat() + 'Z')
128
168
self.messages.insert(message_uuid, message_dict)
134
174
% (message_dict.get('message-id', None), archive_uuid))
135
175
return message_uuid
137
def _format_message(self, message):
139
'date': message.get('date'),
140
'from': message.get('from'),
141
'subject': message.get('subject'),
142
'message-id': message.get('message-id'),
145
177
def _trim(self, sequence, end):
146
178
"""Return the sequence with one of the ends trimmed.
154
186
return sequence[1:]
156
188
def get_messages(self, archive_uuid, order, count, memo, backward=False,
157
start_date=None, finish_date=None):
189
start_date=None, finish_date=None, format='all',
190
headers=['from', 'date', 'subject', 'message-id']):
158
191
if order in ("date", "-date"):
159
192
reversed = order[0] == '-'
161
194
raise AssertionError("Unsupported order.")
163
memo = uuid.UUID(memo)
170
if start_date is not None:
171
start = _bound_timeuuid(
172
start, _utc_timeuuid(start_date, lowest_val=False), max=True)
173
if finish_date is not None:
174
finish = _bound_timeuuid(
175
finish, _utc_timeuuid(finish_date, lowest_val=False))
196
memo, start, finish = _make_bounds(
197
memo, start_date, finish_date, backward)
177
199
# Get up to n+1 messages from the memo: the last item of the
178
200
# previous batch (because that's where the memo starts) + this
198
220
# We've narrowed down the message references. Fetch the messages.
199
221
ids = [v for k, v in pairs]
200
messages = self.messages.multiget(
201
ids, columns=['date', 'from', 'subject', 'message-id'])
222
formatter = functools.partial(
223
_format_message, headers=headers, include_raw=True)
224
# XXX: No need to get all columns. Restrict based on format.
225
messages = self.messages.multiget(ids)
204
228
str(pairs[0][0]),
205
[self._format_message(messages[id]) for id in ids],
229
[formatter(messages[id]) for id in ids],
206
230
str(pairs[-1][0]),