~unity-2d-team/unity-2d/Shell-MultiMonitor : contents of grackle/model.py at revision 38

~unity-2d-team/unity-2d/Shell-MultiMonitor : (revision 38)
# Copyright (c) 2012 Canonical Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public
# License along with this program. If not, see
# <http://www.gnu.org/licenses/>.

import datetime
import dateutil.tz
import email.parser
from email.utils import parsedate_tz
import functools
import logging
import time
import uuid

import pycassa
from pycassa.system_manager import (
    LEXICAL_UUID_TYPE,
    SystemManager,
    TIME_UUID_TYPE,
    )
from pycassa.util import convert_time_to_uuid

from grackle.cassandra import workaround_1779


def create_schema(host, keyspace, clobber=False, create_keyspace=False):
    mgr = SystemManager(host)

    if create_keyspace:
        mgr.create_keyspace(keyspace, replication_factor=1)

    if clobber:
        for cf in mgr.get_keyspace_column_families(keyspace):
            mgr.drop_column_family(keyspace, cf)

    try:
        workaround_1779(
            mgr.create_column_family, keyspace, 'message',
            key_validation_class=LEXICAL_UUID_TYPE)
        workaround_1779(
            mgr.create_column_family, keyspace, 'archive_message',
            comparator_type=TIME_UUID_TYPE,
            default_validation_class=LEXICAL_UUID_TYPE)
        pass
    finally:
        mgr.close()


def _parse_message(message):
    """Get a date and dict of an RFC822 message."""
    parsed = email.parser.Parser().parsestr(message)
    message_dict = {}

    for key in ('from', 'to', 'subject', 'message-id'):
        value = parsed.get(key, None)
        if value is not None:
            message_dict[key] = value

    date = parsed.get('date')
    if date is not None:
        try:
            pdate = parsedate_tz(date)
            date = datetime.datetime(
                *pdate[:6],
                tzinfo=dateutil.tz.tzoffset('', pdate[9]))
        except ValueError:
            pass
    message_dict['date'] = date.isoformat() if date is not None else None

    return date, message_dict


def _utc_datetime(dt):
    return dt.astimezone(dateutil.tz.tzutc())


def _utc_timestamp(dt):
    return time.mktime(_utc_datetime(dt).timetuple()) - time.timezone


def _utc_timeuuid(dt, lowest_val=True):
    return convert_time_to_uuid(_utc_timestamp(dt), lowest_val)


def _cmp_timeuuid(a, b):
    if a.time != b.time:
        return cmp(a.time, b.time)
    return cmp(a, b)


def _bound_timeuuid(a, b, max=False):
    if a == '' or _cmp_timeuuid(b, a) == (1 if max else -1):
        return b
    return a


def _make_bounds(memo, range_start, range_finish, backward):
    start = finish = ''
    if memo != '':
        memo = uuid.UUID(memo)
    if backward:
        finish = memo
    else:
        start = memo
    if range_start is not None:
        start = _bound_timeuuid(
            start, _utc_timeuuid(range_start), max=True)
    if range_finish is not None:
        finish = _bound_timeuuid(
            finish, _utc_timeuuid(range_finish, lowest_val=False))
    return memo, start, finish


LEGAL_HEADERS = set([
    'date', 'from', 'subject', 'message-id',
    ])


def _format_message(want, message):
    data = {}
    for key in want:
        data[key] = message.get(key)
    return data


def _format_all(headers):
    assert not set(headers).difference(LEGAL_HEADERS)
    want = list(headers) + ['content']
    return want, functools.partial(_format_message, want)


FORMATS = {
    'all': _format_all,
    }


class CassandraConnection(object):

    def __init__(self, keyspace, host):
        self._keyspace = keyspace
        self._host = host
        self._connection = self._connect()
        self.messages = self._column_family('message')
        self.archive_messages = self._column_family('archive_message')

    def _connect(self):
        return pycassa.connect(self._keyspace, self._host)

    def _column_family(self, name):
        return pycassa.ColumnFamily(self._connection, name)

    def add_message(self, archive_uuid, message):
        message_uuid = uuid.uuid4()
        message_date, message_dict = _parse_message(message)
        message_dict['content'] = message
        message_dict['date_created'] = (
            datetime.datetime.utcnow().isoformat() + 'Z')
        self.messages.insert(message_uuid, message_dict)
        self.archive_messages.insert(
            archive_uuid,
            {_utc_timestamp(message_date): message_uuid})
        logging.debug(
            'Imported %s into %s'
            % (message_dict.get('message-id', None), archive_uuid))
        return message_uuid

    def _trim(self, sequence, end):
        """Return the sequence with one of the ends trimmed.

        :param end: if true, remove the last element. otherwise remove
            the first.
        """
        if end:
            return sequence[:-1]
        else:
            return sequence[1:]

    def get_messages(self, archive_uuid, order, count, memo, backward=False,
                     start_date=None, finish_date=None, format='all',
                     headers=['from', 'date', 'subject', 'message-id']):
        if order in ("date", "-date"):
            reversed = order[0] == '-'
        else:
            raise AssertionError("Unsupported order.")

        memo, start, finish = _make_bounds(
            memo, start_date, finish_date, backward)

        # Get up to n+1 messages from the memo: the last item of the
        # previous batch (because that's where the memo starts) + this
        # batch.
        pairs = self.archive_messages.get(
            archive_uuid, column_count=count + 1, column_start=start,
            column_finish=finish, column_reversed=reversed).items()

        if len(pairs) and memo and pairs[0][0] <= memo:
            # The memo (from the previous batch) was included in the result.
            # Trim it.
            pairs = self._trim(pairs, False ^ backward)
        elif len(pairs) > count:
            # There was no memo in the result, so the n+1th element is
            # unnecessary. Kill it.
            pairs = self._trim(pairs, True ^ backward)

        if len(pairs) == 0:
            return (None, [], None)

        assert 0 < len(pairs) <= count

        # We've narrowed down the message references. Fetch the messages.
        ids = [v for k, v in pairs]

        wanted_cols, func = FORMATS[format](headers)

        # XXX: No need to get all columns. Restrict based on format.
        messages = self.messages.multiget(ids)

        return (
            str(pairs[0][0]),
            [func(messages[id]) for id in ids],
            str(pairs[-1][0]),
            )