~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.7 by Karl Fogel
Add the copyright header block to more files.
2
#
14538.2.49 by Curtis Hovey
Updated copyright.
3
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
8687.15.7 by Karl Fogel
Add the copyright header block to more files.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
7675.86.8 by Guilherme Salgado
Glue everything together in the script
5
8269.5.11 by Guilherme Salgado
Change apache log parsing script to pop items from the dict as it goes along to free some memory.
6
"""Parse librarian apache logs to find out download counts for each file.
7
8
Thanks to the *huge* number of different LibraryFileAlias objects this script
9
will fetch when parsing multiple log files from scratch and the fact that we
10
overwrite storm's cache with something that caches *everything*, this script
11
may end up eating all your RAM. That shouldn't happen in general as we run
12
it multiple times a day, but if we ever fail to run it for more than a week,
13
we may need to add a hack (store._cache.clear()) to clear the cache after
14
updating the counts of every LFA, in order to get through the backlog.
15
"""
7675.86.8 by Guilherme Salgado
Glue everything together in the script
16
17
__metaclass__ = type
18
19
# pylint: disable-msg=W0403
20
import _pythonpath
14578.2.1 by William Grant
Move librarian stuff from canonical.launchpad to lp.services.librarian. canonical.librarian remains untouched.
21
from storm.sqlobject import SQLObjectNotFound
7675.86.8 by Guilherme Salgado
Glue everything together in the script
22
from zope.component import getUtility
23
24
from canonical.config import config
14578.2.2 by William Grant
Move the librarian apache log parser to lp.services.librarian. Its time in lp.services.apachelogparser was short.
25
from lp.services.apachelogparser.script import ParseApacheLogs
26
from lp.services.librarian.apachelogparser import (
14578.2.1 by William Grant
Move librarian stuff from canonical.launchpad to lp.services.librarian. canonical.librarian remains untouched.
27
    DBUSER,
28
    get_library_file_id,
29
    )
30
from lp.services.librarian.interfaces import ILibraryFileAliasSet
9399.3.1 by William Grant
Factor the non-LFA-specific bits out of the parser script itself.
31
32
33
class ParseLibrarianApacheLogs(ParseApacheLogs):
9399.3.2 by William Grant
Add docstrings to ParseLibrarianApacheLogs.
34
    """An Apache log parser for LibraryFileAlias downloads."""
35
9399.3.1 by William Grant
Factor the non-LFA-specific bits out of the parser script itself.
36
    def setUpUtilities(self):
9399.3.2 by William Grant
Add docstrings to ParseLibrarianApacheLogs.
37
        """See `ParseApacheLogs`."""
9399.3.1 by William Grant
Factor the non-LFA-specific bits out of the parser script itself.
38
        self.libraryfilealias_set = getUtility(ILibraryFileAliasSet)
39
40
    @property
41
    def root(self):
9399.3.2 by William Grant
Add docstrings to ParseLibrarianApacheLogs.
42
        """See `ParseApacheLogs`."""
9399.3.1 by William Grant
Factor the non-LFA-specific bits out of the parser script itself.
43
        return config.librarianlogparser.logs_root
44
45
    def getDownloadKey(self, path):
9399.3.2 by William Grant
Add docstrings to ParseLibrarianApacheLogs.
46
        """See `ParseApacheLogs`."""
9399.3.1 by William Grant
Factor the non-LFA-specific bits out of the parser script itself.
47
        return get_library_file_id(path)
48
49
    def getDownloadCountUpdater(self, file_id):
9399.3.2 by William Grant
Add docstrings to ParseLibrarianApacheLogs.
50
        """See `ParseApacheLogs`."""
9399.3.1 by William Grant
Factor the non-LFA-specific bits out of the parser script itself.
51
        try:
52
            return self.libraryfilealias_set[file_id].updateDownloadCount
53
        except SQLObjectNotFound:
54
            # This file has been deleted from the librarian, so don't
55
            # try to store download counters for it.
56
            return None
7675.86.8 by Guilherme Salgado
Glue everything together in the script
57
58
59
if __name__ == '__main__':
60
    script = ParseLibrarianApacheLogs('parse-librarian-apache-logs', DBUSER)
61
    script.lock_and_run()