1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
import httplib
import simplejson
from urlparse import urlunparse
from urllib import (
quote,
urlencode,
)
class UnparsableDateRange(Exception):
"""The date_range was not in the format of 2012-01-01..2012-01-31."""
class UnsupportedDisplayType(Exception):
"""Raised when an Unsupported display_type is requested."""
class UnsupportedOrder(Exception):
"""Raised when an Unsupported order is requested."""
SUPPORTED_DISPLAY_TYPES = (
'all',
'text-only',
'headers-only',
)
class GrackleClient:
"""Class for accessing Grackle web service."""
def __init__(self, host, port):
"""Constructor.
:param host: The name of the server.
:param port: The port providing Grackle service.
"""
self.host = host
self.port = port
self.netloc = '%s:%d' % (host, port)
def archive_url(self, archive_id, query):
"""Return the URL for an archive
:param archive_id: The id of the archive to generate the URL for.
:param query: The query to use in the URL, as a dict.
"""
path = '/archive/%s' % quote(archive_id)
query_string = urlencode(query)
return urlunparse(('http', self.netloc, path, '', query_string, ''))
def _get_connection(self):
return httplib.HTTPConnection(self.host, self.port)
def _method_archive(self, method, archive_id, query, body=None):
"""Perform an HTTP method on an archive's URL."""
url = self.archive_url(archive_id, query)
connection = self._get_connection()
connection.request(method, url, body)
return connection.getresponse()
def put_message(self, archive_id, key, file_obj):
"""Put a message into an archive.
:param archive_id: The archive to put the message into.
:param key: An arbitrary identifier that can later be used to retrieve
the message.
:param file_obj: The raw text of the message, as a file.
"""
response = self._method_archive(
'POST', archive_id, {'key': key}, file_obj.read())
response.read()
if response.status == httplib.BAD_REQUEST:
raise Exception('wtf')
elif response.status == httplib.CREATED:
return
else:
raise Exception('!!')
def get_messages(self, archive_id, message_ids=None, date_range=None,
limit=None, memo=None, order=None, headers=None,
include_hidden=False, max_body_length=None,
display_type='all'):
"""Retrieve specified messages.
:param archive_id: The archive to retrieve messages from.
:param message_ids: (optional) Retrieve only messages with these ids.
:param date_range: Retrieve the messages from or between a range of
dates. Example: 2012-01-01..2012-01-31 retrieve all the messages
between the 01 and 31 of January, including message from 01
and 31.
:param limit: The maximum number of messages to return. The server
may, at its discretion, return fewer.
:param memo: (optional) Opaque identifier describing the position in
the list of messages to return. The combination of a memo and a
limit describes a batch of results. If not specified, the start
is used.
:param order: The order to return results in. Supported orders are
determined by the server. See test_client.SUPPORTED_ORDERS for an
example.
:param headers: The headers to include in the message. Only headers
actually present in the message will be provided. If unspecified,
most headers will be included.
:param max_body_length: The maximum length for a message's body. When
multiple messages are nested (as with a thread), this applies to
each message's body, not the aggregate length of all messages'
bodies.
:param include_hidden: If true, include messages that have been
flagged "hidden" in the results.
:param display_type: Adjust the message content to meet the needs of
the intended display. Valid values are:
all: (the default) include all message content.
text-only: include only plain/text parts; exclude all other parts.
headers-only: include only the message headers.
"""
parameters = {}
if message_ids is not None:
parameters['message_ids'] = message_ids
if date_range is not None:
parameters['date_range'] = date_range
if limit is not None:
parameters['limit'] = limit
if memo is not None:
parameters['memo'] = memo
if order is not None:
parameters['order'] = order
if headers is not None:
parameters['headers'] = headers
if max_body_length is not None:
parameters['max_body_length'] = max_body_length
parameters['display_type'] = display_type
parameters['include_hidden'] = include_hidden
query = {'parameters': simplejson.dumps(parameters)}
response = self._method_archive('GET', archive_id, query)
if response.status == httplib.BAD_REQUEST:
if response.reason == UnsupportedOrder.__doc__:
raise UnsupportedOrder
elif response.reason == UnsupportedDisplayType.__doc__:
raise UnsupportedDisplayType
elif response.reason == UnparsableDateRange.__doc__:
raise UnparsableDateRange
else:
raise ValueError('Bad request')
data = response.read()
return simplejson.loads(data)
|