~launchpad-pqm/launchpad/devel

7675.686.9 by Stuart Bishop
Basic database utilization report
1
#!/usr/bin/python2.5 -S
2
# Copyright 2010 Canonical Ltd.  This software is licensed under the
3
# GNU Affero General Public License version 3 (see the file LICENSE).
4
5
"""Generate the database statistics report."""
6
7
__metaclass__ = type
8
9
import _pythonpath
10
11
from operator import attrgetter
12
13
from canonical.database.sqlbase import connect, sqlvalues
14
from canonical.launchpad.scripts import db_options
15
from lp.scripts.helpers import LPOptionParser
16
17
18
class Table:
19
    pass
20
21
22
def get_table_stats(cur, options):
23
    tablestats_query = """
24
        SELECT
25
            Earliest.date_created AS date_start,
26
            Latest.date_created AS date_end,
27
            Latest.schemaname,
28
            Latest.relname,
29
            Latest.seq_scan - Earliest.seq_scan AS seq_scan,
30
            Latest.seq_tup_read - Earliest.seq_tup_read AS seq_tup_read,
31
            Latest.idx_scan - Earliest.idx_scan AS idx_scan,
32
            Latest.idx_tup_fetch - Earliest.idx_tup_fetch AS idx_tup_fetch,
33
            Latest.n_tup_ins - Earliest.n_tup_ins AS n_tup_ins,
34
            Latest.n_tup_upd - Earliest.n_tup_upd AS n_tup_upd,
35
            Latest.n_tup_del - Earliest.n_tup_del AS n_tup_del,
36
            Latest.n_tup_hot_upd - Earliest.n_tup_hot_upd AS n_tup_hot_upd,
37
            Latest.n_live_tup,
38
            Latest.n_dead_tup,
39
            Latest.last_vacuum,
40
            Latest.last_autovacuum,
41
            Latest.last_analyze,
42
            Latest.last_autoanalyze
43
        FROM
44
            DatabaseTableStats AS Earliest,
45
            DatabaseTableStats AS Latest
46
        WHERE
47
            Earliest.date_created = (
48
                SELECT min(date_created) FROM DatabaseTableStats
49
                WHERE date_created >= CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
50
                    - CAST(%s AS interval))
51
            AND Latest.date_created = (
52
                SELECT max(date_created) FROM DatabaseTableStats)
53
            AND Earliest.schemaname = Latest.schemaname
54
            AND Earliest.relname = Latest.relname
55
        """ % sqlvalues(options.since_interval)
56
57
    cur.execute(tablestats_query)
58
59
    # description[0] is the column name, per PEP-0249
60
    fields = [description[0] for description in cur.description]
61
    tables = set()
62
    for row in cur.fetchall():
63
        table = Table()
64
        for index in range(len(fields)):
65
            setattr(table, fields[index], row[index])
66
        table.total_tup_read = table.seq_tup_read + table.idx_tup_fetch
67
        table.total_tup_written = (
68
            table.n_tup_ins + table.n_tup_upd + table.n_tup_del)
69
        tables.add(table)
70
71
    return tables
72
73
74
def get_cpu_stats(cur, options):
7675.686.14 by Stuart Bishop
Fix bogus CPU utilization report
75
    # This query calculates the averate cpu utilization from the
76
    # samples. It assumes samples are taken at regular intervals over
77
    # the period.
7675.686.9 by Stuart Bishop
Basic database utilization report
78
    query = """
7675.686.14 by Stuart Bishop
Fix bogus CPU utilization report
79
        SELECT (
80
            CAST(SUM(cpu) AS float) / (
81
                SELECT COUNT(DISTINCT date_created) FROM DatabaseCpuStats
82
                WHERE
83
                    date_created >= (CURRENT_TIMESTAMP AT TIME ZONE 'UTC')
84
                    - CAST (%s AS interval))
85
            ) AS avg_cpu, username
86
        FROM DatabaseCpuStats
7675.686.9 by Stuart Bishop
Basic database utilization report
87
        WHERE date_created >= (CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
88
            - CAST(%s AS interval))
89
        GROUP BY username
7675.686.14 by Stuart Bishop
Fix bogus CPU utilization report
90
        """ % sqlvalues(options.since_interval, options.since_interval)
7675.686.9 by Stuart Bishop
Basic database utilization report
91
92
    cur.execute(query)
93
94
    return set(cur.fetchall())
95
96
97
def main():
98
    parser = LPOptionParser()
99
    db_options(parser)
100
    parser.add_option(
101
        "-i", "--interval", dest="since_interval", type=str,
102
        default="100 years", metavar="INTERVAL",
103
        help=
104
            "Use statistics collected over the last INTERVAL period. "
105
            "INTERVAL is a string parsable by PostgreSQL "
106
            "such as '5 minutes'.")
107
    parser.add_option(
108
        "-n", "--limit", dest="limit", type=int,
109
        default=15, metavar="NUM",
110
        help="Display the top NUM items in each category.")
111
    parser.set_defaults(dbuser="database_stats_report")
112
    options, args = parser.parse_args()
113
114
    con = connect(options.dbuser)
115
    cur = con.cursor()
116
117
    tables = get_table_stats(cur, options)
118
    arbitrary_table = list(tables)[0]
119
    interval = arbitrary_table.date_end - arbitrary_table.date_start
7675.686.16 by Stuart Bishop
Correctly calculate number of seconds in a day
120
    per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
7675.686.9 by Stuart Bishop
Basic database utilization report
121
122
    print "== Most Read Tables =="
123
    print
7675.686.11 by Stuart Bishop
Comments per review feedback
124
    # These match the pg_user_table_stats view. schemaname is the
125
    # namespace (normally 'public'), relname is the table (relation)
126
    # name. total_tup_red is the total number of rows read.
127
    # idx_tup_fetch is the number of rows looked up using an index.
7675.686.9 by Stuart Bishop
Basic database utilization report
128
    tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname']
129
    most_read_tables = sorted(
130
        tables, key=attrgetter(*tables_sort), reverse=True)
131
    for table in most_read_tables[:options.limit]:
7675.686.15 by Stuart Bishop
tweak formatting
132
        print "%40s || %10.2f tuples/sec" % (
133
            table.relname, table.total_tup_read / per_second)
7675.686.9 by Stuart Bishop
Basic database utilization report
134
    print
135
136
    print "== Most Written Tables =="
137
    print
138
    tables_sort = [
139
        'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname']
140
    most_written_tables = sorted(
141
        tables, key=attrgetter(*tables_sort), reverse=True)
142
    for table in most_written_tables[:options.limit]:
7675.686.15 by Stuart Bishop
tweak formatting
143
        print "%40s || %10.2f tuples/sec" % (
144
            table.relname, table.total_tup_written / per_second)
7675.686.9 by Stuart Bishop
Basic database utilization report
145
    print
146
147
    user_cpu = get_cpu_stats(cur, options)
148
    print "== Most Active Users =="
149
    print
150
    for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
7675.686.15 by Stuart Bishop
tweak formatting
151
        print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)
7675.686.9 by Stuart Bishop
Basic database utilization report
152
153
154
if __name__ == '__main__':
155
    main()