~unity-2d-team/unity-2d/Shell-MultiMonitor

« back to all changes in this revision

Viewing changes to grackle/model.py

Committer: Aaron Bentley
Date: 2012-01-10 10:46:26 UTC
Revision ID: aaron@canonical.com-20120110104626-39ehw9nhnzdzggtw

Add README and LICENSE

files removed:
Makefile

grackle

grackle-create-instance

grackle-put-message

grackle/__init__.py

grackle/cassandra.py

grackle/client.py

grackle/error.py

grackle/model.py

grackle/service.py

grackle/store.py

grackle/testing

grackle/testing/__init__.py

grackle/testing/cassandra.py

grackle/tests

grackle/tests/__init__.py

grackle/tests/test_client.py

grackle/tests/test_model.py

grackle/tests/test_wsgi.py

grackle/wsgi.py

files modified:
README.txt

Show diffs side-by-side

added added

removed removed

grackle/model.py

# This program is free software: you can redistribute it and/or modify

# it under the terms of the GNU Affero General Public License as published by

# the Free Software Foundation, either version 3 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public

# License along with this program. If not, see

# <http://www.gnu.org/licenses/>.

import calendar

import datetime

import email.parser

from email.utils import parsedate_tz

import functools

import logging

import uuid

import dateutil.tz

import pycassa.pool

from pycassa.system_manager import (

LEXICAL_UUID_TYPE,

SystemManager,

TIME_UUID_TYPE,

)

from pycassa.util import convert_time_to_uuid

from grackle.cassandra import workaround_1779

def create_schema(host, keyspace, clobber=False, create_keyspace=False):

mgr = SystemManager(host)

if create_keyspace:

mgr.create_keyspace(keyspace, replication_factor=1)

if clobber:

for cf in mgr.get_keyspace_column_families(keyspace):

mgr.drop_column_family(keyspace, cf)

try:

workaround_1779(

mgr.create_column_family, keyspace, 'message',

key_validation_class=LEXICAL_UUID_TYPE)

workaround_1779(

mgr.create_column_family, keyspace, 'archive_message',

comparator_type=TIME_UUID_TYPE,

default_validation_class=LEXICAL_UUID_TYPE)

pass

finally:

mgr.close()

def _parse_message(message):

"""Get a date and dict of an RFC822 message."""

parsed = email.parser.Parser().parsestr(message)

message_dict = {}

for key in ('from', 'to', 'subject', 'message-id'):

value = parsed.get(key, None)

if value is not None:

message_dict[key] = value

date = parsed.get('date')

if date is not None:

try:

pdate = parsedate_tz(date)

date = datetime.datetime(

*pdate[:6],

tzinfo=dateutil.tz.tzoffset('', pdate[9]))

except ValueError:

pass

message_dict['date'] = date.isoformat() if date is not None else None

return date, message_dict

def _utc_datetime(dt):

return dt.astimezone(dateutil.tz.tzutc())

def _utc_timestamp(dt):

return calendar.timegm(_utc_datetime(dt).timetuple())

def _utc_timeuuid(dt, lowest_val=True):

return convert_time_to_uuid(_utc_timestamp(dt), lowest_val)

def _cmp_timeuuid(a, b):

if a.time != b.time:

return cmp(a.time, b.time)

return cmp(a, b)

100

101

102

def _bound_timeuuid(a, b, max=False):

103

if a == '' or _cmp_timeuuid(b, a) == (1 if max else -1):

104

return b

105

return a

106

107

108

def _make_bounds(memo, range_start, range_finish, backward):

109

start = finish = ''

110

if memo != '':

111

memo = uuid.UUID(memo)

112

if backward:

113

finish = memo

114

else:

115

start = memo

116

if range_start is not None:

117

start = _bound_timeuuid(

118

start, _utc_timeuuid(range_start), max=True)

119

if range_finish is not None:

120

finish = _bound_timeuuid(

121

finish, _utc_timeuuid(range_finish, lowest_val=False))

122

return memo, start, finish

123

124

125

LEGAL_HEADERS = set([

126

'date', 'from', 'subject', 'message-id',

127

])

128

129

130

def _format_message(message, headers=[], include_raw=False):

131

data = {}

132

133

if headers:

134

assert not set(headers).difference(LEGAL_HEADERS)

135

hdict = {}

136

for header in headers:

137

hdict[header] = message.get(header)

138

data['headers'] = hdict

139

140

if include_raw:

141

data['raw'] = message['raw']

142

143

return data

144

145

146

class CassandraConnection(object):

147

148

def __init__(self, keyspace, hosts):

149

self._keyspace = keyspace

150

self._hosts = hosts

151

self._connection = self._connect()

152

self._pool = self._connect()

153

self.messages = self._column_family('message')

154

self.archive_messages = self._column_family('archive_message')

155

156

def _connect(self):

157

return pycassa.pool.ConnectionPool(self._keyspace, self._hosts)

158

159

def _column_family(self, name):

160

return pycassa.ColumnFamily(self._pool, name)

161

162

def add_message(self, archive_uuid, message):

163

message_uuid = uuid.uuid4()

164

message_date, message_dict = _parse_message(message)

165

message_dict['raw'] = message

166

message_dict['date_created'] = (

167

datetime.datetime.utcnow().isoformat() + 'Z')

168

self.messages.insert(message_uuid, message_dict)

169

self.archive_messages.insert(

170

archive_uuid,

171

{_utc_timestamp(message_date): message_uuid})

172

logging.debug(

173

'Imported %s into %s'

174

% (message_dict.get('message-id', None), archive_uuid))

175

return message_uuid

176

177

def _trim(self, sequence, end):

178

"""Return the sequence with one of the ends trimmed.

179

180

:param end: if true, remove the last element. otherwise remove

181

the first.

182

"""

183

if end:

184

return sequence[:-1]

185

else:

186

return sequence[1:]

187

188

def get_messages(self, archive_uuid, order, count, memo, backward=False,

189

start_date=None, finish_date=None, format='all',

190

headers=['from', 'date', 'subject', 'message-id']):

191

if order in ("date", "-date"):

192

reversed = order[0] == '-'

193

else:

194

raise AssertionError("Unsupported order.")

195

196

memo, start, finish = _make_bounds(

197

memo, start_date, finish_date, backward)

198

199

# Get up to n+1 messages from the memo: the last item of the

200

# previous batch (because that's where the memo starts) + this

201

# batch.

202

pairs = self.archive_messages.get(

203

archive_uuid, column_count=count + 1, column_start=start,

204

column_finish=finish, column_reversed=reversed).items()

205

206

if len(pairs) and memo and pairs[0][0] <= memo:

207

# The memo (from the previous batch) was included in the result.

208

# Trim it.

209

pairs = self._trim(pairs, False ^ backward)

210

elif len(pairs) > count:

211

# There was no memo in the result, so the n+1th element is

212

# unnecessary. Kill it.

213

pairs = self._trim(pairs, True ^ backward)

214

215

if len(pairs) == 0:

216

return (None, [], None)

217

218

assert 0 < len(pairs) <= count

219

220

# We've narrowed down the message references. Fetch the messages.

221

ids = [v for k, v in pairs]

222

formatter = functools.partial(

223

_format_message, headers=headers, include_raw=True)

224

# XXX: No need to get all columns. Restrict based on format.

225

messages = self.messages.multiget(ids)

226

227

return (

228

str(pairs[0][0]),

229

[formatter(messages[id]) for id in ids],

230

str(pairs[-1][0]),

231

)

Older »