~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python -S
#
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Parse librarian apache logs to find out download counts for each file.

Thanks to the *huge* number of different LibraryFileAlias objects this script
will fetch when parsing multiple log files from scratch and the fact that we
overwrite storm's cache with something that caches *everything*, this script
may end up eating all your RAM. That shouldn't happen in general as we run
it multiple times a day, but if we ever fail to run it for more than a week,
we may need to add a hack (store._cache.clear()) to clear the cache after
updating the counts of every LFA, in order to get through the backlog.
"""

__metaclass__ = type

# pylint: disable-msg=W0403
import _pythonpath

from zope.component import getUtility

from storm.sqlobject import SQLObjectNotFound

from canonical.config import config
from canonical.launchpad.interfaces.librarian import ILibraryFileAliasSet
from lp.services.apachelogparser.librarian_apache_log_parser import (
    DBUSER, get_library_file_id)
from lp.services.apachelogparser.script import ParseApacheLogs


class ParseLibrarianApacheLogs(ParseApacheLogs):
    """An Apache log parser for LibraryFileAlias downloads."""

    def setUpUtilities(self):
        """See `ParseApacheLogs`."""
        self.libraryfilealias_set = getUtility(ILibraryFileAliasSet)

    @property
    def root(self):
        """See `ParseApacheLogs`."""
        return config.librarianlogparser.logs_root

    def getDownloadKey(self, path):
        """See `ParseApacheLogs`."""
        return get_library_file_id(path)

    def getDownloadCountUpdater(self, file_id):
        """See `ParseApacheLogs`."""
        try:
            return self.libraryfilealias_set[file_id].updateDownloadCount
        except SQLObjectNotFound:
            # This file has been deleted from the librarian, so don't
            # try to store download counters for it.
            return None


if __name__ == '__main__':
    script = ParseLibrarianApacheLogs('parse-librarian-apache-logs', DBUSER)
    script.lock_and_run()