~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.22 by Karl Fogel
Add the copyright header block to the remaining .py files.
2
#
3
# Copyright 2009 Canonical Ltd.  This software is licensed under the
4
# GNU Affero General Public License version 3 (see the file LICENSE).
6916.4.1 by Stuart Bishop
Report on Librarian space usage
5
6
"""Report a breakdown of Librarian disk space usage."""
7
8
__metaclass__ = type
9
__all__ = []
10
11
import _pythonpath
12
10054.15.1 by Stuart Bishop
Fix scripts/librarian-report.py and add database connection command line options
13
from optparse import OptionParser
6916.4.1 by Stuart Bishop
Report on Librarian space usage
14
import sys
15
14606.3.4 by William Grant
Replace canonical.database usage everywhere, and format-imports.
16
from lp.services.database.postgresql import listReferences
17
from lp.services.database.sqlbase import (
18
    connect,
19
    quoteIdentifier,
20
    sqlvalues,
21
    )
14565.2.15 by Curtis Hovey
Moved canonical.launchpad.scripts __init__ to lp.services.scripts.
22
from lp.services.scripts import db_options
6916.4.1 by Stuart Bishop
Report on Librarian space usage
23
24
25
def main():
10054.15.1 by Stuart Bishop
Fix scripts/librarian-report.py and add database connection command line options
26
    parser = OptionParser()
10054.15.5 by Stuart Bishop
Support for date ranges in librarian-report.py
27
10054.15.1 by Stuart Bishop
Fix scripts/librarian-report.py and add database connection command line options
28
    db_options(parser)
10054.15.5 by Stuart Bishop
Support for date ranges in librarian-report.py
29
    parser.add_option(
30
        "-f", "--from", dest="from_date", default=None,
31
        metavar="DATE", help="Only count new files since DATE (yyyy/mm/dd)")
32
    parser.add_option(
33
        "-u", "--until", dest="until_date", default=None,
34
        metavar="DATE", help="Only count new files until DATE (yyyy/mm/dd)")
35
10054.15.1 by Stuart Bishop
Fix scripts/librarian-report.py and add database connection command line options
36
    options, args = parser.parse_args()
37
    if len(args) > 0:
38
        parser.error("Too many command line arguments.")
10054.15.5 by Stuart Bishop
Support for date ranges in librarian-report.py
39
40
    # Handle date filters. We use LibraryFileContent.datecreated rather
41
    # than LibraryFileAlias.datecreated as this report is about actual
42
    # disk space usage. A new row in the database linking to a
43
    # previously existing file in the Librarian takes up no new space.
44
    if options.from_date is not None:
45
        from_date = 'AND LFC.datecreated >= %s' % sqlvalues(
46
            options.from_date)
47
    else:
48
        from_date = ''
49
    if options.until_date is not None:
50
        until_date = 'AND LFC.datecreated <= %s' % sqlvalues(
51
            options.until_date)
52
    else:
53
        until_date = ''
54
13879.1.3 by William Grant
Drop now-obsolete connect(user) args.
55
    con = connect()
6916.4.1 by Stuart Bishop
Report on Librarian space usage
56
    cur = con.cursor()
57
58
    # Collect direct references to the LibraryFileAlias table.
59
    references = set(
6916.4.2 by Stuart Bishop
Review feedback
60
        (from_table, from_column)
61
        # Note that listReferences is recursive, which we don't
62
        # care about in this simple report. We also ignore the
63
        # irrelevant constraint type update and delete flags.
64
        for from_table, from_column, to_table, to_column, update, delete
6916.4.1 by Stuart Bishop
Report on Librarian space usage
65
            in listReferences(cur, 'libraryfilealias', 'id')
6916.4.2 by Stuart Bishop
Review feedback
66
        if to_table == 'libraryfilealias'
6916.4.1 by Stuart Bishop
Report on Librarian space usage
67
        )
68
69
    totals = set()
6916.4.2 by Stuart Bishop
Review feedback
70
    for referring_table, referring_column in sorted(references):
10054.15.2 by Stuart Bishop
Ignore libraryfiledownloadcount table, as it is ignored by the garbage collector
71
        if referring_table == 'libraryfiledownloadcount':
72
            continue
6916.4.2 by Stuart Bishop
Review feedback
73
        quoted_referring_table = quoteIdentifier(referring_table)
74
        quoted_referring_column = quoteIdentifier(referring_column)
6916.4.1 by Stuart Bishop
Report on Librarian space usage
75
        cur.execute("""
76
            SELECT
77
                COALESCE(SUM(filesize), 0),
7675.395.29 by Stuart Bishop
Update librarian-report.py to cope with bigint filesize
78
                pg_size_pretty(CAST(COALESCE(SUM(filesize), 0) AS bigint)),
6916.4.1 by Stuart Bishop
Report on Librarian space usage
79
                COUNT(*)
6985.1.1 by Stuart Bishop
Fix librarian report SQL
80
            FROM (
81
                SELECT DISTINCT ON (LFC.id) LFC.id, LFC.filesize
82
                FROM LibraryFileContent AS LFC, LibraryFileAlias AS LFA, %s
83
                WHERE LFC.id = LFA.content
84
                    AND LFA.id = %s.%s
85
                    AND (
86
                        LFA.expires IS NULL
87
                        OR LFA.expires > CURRENT_TIMESTAMP AT TIME ZONE 'UTC')
10054.15.5 by Stuart Bishop
Support for date ranges in librarian-report.py
88
                    %s %s
6985.1.1 by Stuart Bishop
Fix librarian report SQL
89
                ORDER BY LFC.id
90
                ) AS Whatever
6916.4.2 by Stuart Bishop
Review feedback
91
            """ % (
92
                quoted_referring_table, quoted_referring_table,
10054.15.5 by Stuart Bishop
Support for date ranges in librarian-report.py
93
                quoted_referring_column, from_date, until_date))
6916.4.1 by Stuart Bishop
Report on Librarian space usage
94
        total_bytes, formatted_size, num_files = cur.fetchone()
6916.4.2 by Stuart Bishop
Review feedback
95
        totals.add((total_bytes, referring_table, formatted_size, num_files))
6916.4.1 by Stuart Bishop
Report on Librarian space usage
96
97
    for total_bytes, tab_name, formatted_size, num_files in sorted(
98
        totals, reverse=True):
99
        print '%-10s %s in %d files' % (formatted_size, tab_name, num_files)
100
101
    return 0
102
103
104
if __name__ == '__main__':
105
    sys.exit(main())