~launchpad-pqm/launchpad/devel

7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
1
#!/usr/bin/python -S
7675.686.9 by Stuart Bishop
Basic database utilization report
2
# Copyright 2010 Canonical Ltd.  This software is licensed under the
3
# GNU Affero General Public License version 3 (see the file LICENSE).
4
5
"""Generate the database statistics report."""
6
7
__metaclass__ = type
8
9
import _pythonpath
10
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
11
from datetime import datetime
7675.686.9 by Stuart Bishop
Basic database utilization report
12
from operator import attrgetter
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
13
from textwrap import dedent
7675.686.9 by Stuart Bishop
Basic database utilization report
14
15
from canonical.database.sqlbase import connect, sqlvalues
16
from canonical.launchpad.scripts import db_options
17
from lp.scripts.helpers import LPOptionParser
18
19
20
class Table:
21
    pass
22
23
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
24
def get_where_clause(options):
25
    "Generate a WHERE clause referencing the date_created column."
26
    # We have two of the from timestamp, the until timestamp and an
27
    # interval. The interval is in a format unsuitable for processing in
28
    # Python. If the interval is set, it represents the period before
29
    # the until timestamp or the period after the from timestamp,
30
    # depending on which of these is set. From this information,
31
    # generate the SQL representation of the from timestamp and the
32
    # until timestamp.
33
    if options.from_ts:
34
        from_sql = ("CAST(%s AS timestamp without time zone)"
35
            % sqlvalues(options.from_ts))
36
    elif options.interval and options.until_ts:
37
        from_sql = (
38
            "CAST(%s AS timestamp without time zone) - CAST(%s AS interval)"
39
            % sqlvalues(options.until_ts, options.interval))
40
    elif options.interval:
41
        from_sql = (
42
            "(CURRENT_TIMESTAMP AT TIME ZONE 'UTC') - CAST(%s AS interval)"
43
            % sqlvalues(options.interval))
44
    else:
45
        from_sql = "CAST('1970-01-01' AS timestamp without time zone)"
46
47
    if options.until_ts:
48
        until_sql = (
49
            "CAST(%s AS timestamp without time zone)"
50
            % sqlvalues(options.until_ts))
51
    elif options.interval and options.from_ts:
52
        until_sql = (
53
            "CAST(%s AS timestamp without time zone) + CAST(%s AS interval)"
54
            % sqlvalues(options.from_ts, options.interval))
55
    else:
56
        until_sql = "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"
57
58
    clause = "date_created BETWEEN (%s) AND (%s)" % (from_sql, until_sql)
59
60
    return clause
61
62
7675.686.9 by Stuart Bishop
Basic database utilization report
63
def get_table_stats(cur, options):
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
64
    params = {'where': get_where_clause(options)}
65
    tablestats_query = dedent("""\
7675.686.9 by Stuart Bishop
Basic database utilization report
66
        SELECT
67
            Earliest.date_created AS date_start,
68
            Latest.date_created AS date_end,
69
            Latest.schemaname,
70
            Latest.relname,
71
            Latest.seq_scan - Earliest.seq_scan AS seq_scan,
72
            Latest.seq_tup_read - Earliest.seq_tup_read AS seq_tup_read,
73
            Latest.idx_scan - Earliest.idx_scan AS idx_scan,
74
            Latest.idx_tup_fetch - Earliest.idx_tup_fetch AS idx_tup_fetch,
75
            Latest.n_tup_ins - Earliest.n_tup_ins AS n_tup_ins,
76
            Latest.n_tup_upd - Earliest.n_tup_upd AS n_tup_upd,
77
            Latest.n_tup_del - Earliest.n_tup_del AS n_tup_del,
78
            Latest.n_tup_hot_upd - Earliest.n_tup_hot_upd AS n_tup_hot_upd,
79
            Latest.n_live_tup,
80
            Latest.n_dead_tup,
81
            Latest.last_vacuum,
82
            Latest.last_autovacuum,
83
            Latest.last_analyze,
84
            Latest.last_autoanalyze
85
        FROM
86
            DatabaseTableStats AS Earliest,
87
            DatabaseTableStats AS Latest
88
        WHERE
89
            Earliest.date_created = (
90
                SELECT min(date_created) FROM DatabaseTableStats
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
91
                WHERE %(where)s)
7675.686.9 by Stuart Bishop
Basic database utilization report
92
            AND Latest.date_created = (
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
93
                SELECT max(date_created) FROM DatabaseTableStats
94
                WHERE %(where)s)
7675.686.9 by Stuart Bishop
Basic database utilization report
95
            AND Earliest.schemaname = Latest.schemaname
96
            AND Earliest.relname = Latest.relname
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
97
        """ % params)
7675.686.9 by Stuart Bishop
Basic database utilization report
98
    cur.execute(tablestats_query)
99
100
    # description[0] is the column name, per PEP-0249
101
    fields = [description[0] for description in cur.description]
102
    tables = set()
103
    for row in cur.fetchall():
104
        table = Table()
105
        for index in range(len(fields)):
106
            setattr(table, fields[index], row[index])
107
        table.total_tup_read = table.seq_tup_read + table.idx_tup_fetch
108
        table.total_tup_written = (
109
            table.n_tup_ins + table.n_tup_upd + table.n_tup_del)
110
        tables.add(table)
111
112
    return tables
113
114
115
def get_cpu_stats(cur, options):
7675.686.14 by Stuart Bishop
Fix bogus CPU utilization report
116
    # This query calculates the averate cpu utilization from the
117
    # samples. It assumes samples are taken at regular intervals over
118
    # the period.
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
119
    # Note that we have to use SUM()/COUNT() instead of AVG() as
120
    # database users not connected when the sample was taken are not
121
    # recorded - we want the average utilization over the time period,
122
    # not the subset of the time period the user was actually connected.
123
    params = {'where': get_where_clause(options)}
124
    query = dedent("""\
7675.686.14 by Stuart Bishop
Fix bogus CPU utilization report
125
        SELECT (
126
            CAST(SUM(cpu) AS float) / (
127
                SELECT COUNT(DISTINCT date_created) FROM DatabaseCpuStats
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
128
                WHERE %(where)s
129
            )) AS avg_cpu, username
7675.686.14 by Stuart Bishop
Fix bogus CPU utilization report
130
        FROM DatabaseCpuStats
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
131
        WHERE %(where)s
7675.686.9 by Stuart Bishop
Basic database utilization report
132
        GROUP BY username
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
133
        """ % params)
7675.686.9 by Stuart Bishop
Basic database utilization report
134
    cur.execute(query)
9893.6.20 by Stuart Bishop
Fold edge database stats into lpnet
135
    cpu_stats = set(cur.fetchall())
136
137
    # Fold edge into lpnet, as they are now running the same code.
138
    # This is a temporary hack until we drop edge entirely. See
139
    # Bug #667883 for details.
140
    lpnet_avg_cpu = 0.0
141
    edge_avg_cpu = 0.0
142
    for stats_tuple in list(cpu_stats):
143
        avg_cpu, username = stats_tuple
144
        if username == 'lpnet':
145
            lpnet_avg_cpu = avg_cpu
146
            cpu_stats.discard(stats_tuple)
147
        elif username == 'edge':
148
            edge_avg_cpu = avg_cpu
149
            cpu_stats.discard(stats_tuple)
150
    cpu_stats.add((lpnet_avg_cpu + edge_avg_cpu, 'lpnet'))
151
152
    return cpu_stats
7675.686.9 by Stuart Bishop
Basic database utilization report
153
154
155
def main():
156
    parser = LPOptionParser()
157
    db_options(parser)
158
    parser.add_option(
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
159
        "-f", "--from", dest="from_ts", type=datetime,
160
        default=None, metavar="TIMESTAMP",
161
        help="Use statistics collected since TIMESTAMP.")
162
    parser.add_option(
163
        "-u", "--until", dest="until_ts", type=datetime,
164
        default=None, metavar="TIMESTAMP",
165
        help="Use statistics collected up until TIMESTAMP.")
166
    parser.add_option(
167
        "-i", "--interval", dest="interval", type=str,
168
        default=None, metavar="INTERVAL",
7675.686.9 by Stuart Bishop
Basic database utilization report
169
        help=
170
            "Use statistics collected over the last INTERVAL period. "
171
            "INTERVAL is a string parsable by PostgreSQL "
172
            "such as '5 minutes'.")
173
    parser.add_option(
174
        "-n", "--limit", dest="limit", type=int,
175
        default=15, metavar="NUM",
176
        help="Display the top NUM items in each category.")
177
    parser.set_defaults(dbuser="database_stats_report")
178
    options, args = parser.parse_args()
179
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
180
    if options.from_ts and options.until_ts and options.interval:
181
        parser.error(
182
            "Only two of --from, --until and --interval may be specified.")
183
7675.686.9 by Stuart Bishop
Basic database utilization report
184
    con = connect(options.dbuser)
185
    cur = con.cursor()
186
7675.686.18 by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods
187
    tables = list(get_table_stats(cur, options))
188
    if len(tables) == 0:
189
        parser.error("No statistics available in that time range.")
190
    arbitrary_table = tables[0]
7675.686.9 by Stuart Bishop
Basic database utilization report
191
    interval = arbitrary_table.date_end - arbitrary_table.date_start
7675.686.16 by Stuart Bishop
Correctly calculate number of seconds in a day
192
    per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
7675.686.9 by Stuart Bishop
Basic database utilization report
193
194
    print "== Most Read Tables =="
195
    print
7675.686.11 by Stuart Bishop
Comments per review feedback
196
    # These match the pg_user_table_stats view. schemaname is the
197
    # namespace (normally 'public'), relname is the table (relation)
198
    # name. total_tup_red is the total number of rows read.
199
    # idx_tup_fetch is the number of rows looked up using an index.
7675.686.9 by Stuart Bishop
Basic database utilization report
200
    tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname']
201
    most_read_tables = sorted(
202
        tables, key=attrgetter(*tables_sort), reverse=True)
203
    for table in most_read_tables[:options.limit]:
7675.686.15 by Stuart Bishop
tweak formatting
204
        print "%40s || %10.2f tuples/sec" % (
205
            table.relname, table.total_tup_read / per_second)
7675.686.9 by Stuart Bishop
Basic database utilization report
206
    print
207
208
    print "== Most Written Tables =="
209
    print
210
    tables_sort = [
211
        'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname']
212
    most_written_tables = sorted(
213
        tables, key=attrgetter(*tables_sort), reverse=True)
214
    for table in most_written_tables[:options.limit]:
7675.686.15 by Stuart Bishop
tweak formatting
215
        print "%40s || %10.2f tuples/sec" % (
216
            table.relname, table.total_tup_written / per_second)
7675.686.9 by Stuart Bishop
Basic database utilization report
217
    print
218
219
    user_cpu = get_cpu_stats(cur, options)
220
    print "== Most Active Users =="
221
    print
222
    for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
7675.686.15 by Stuart Bishop
tweak formatting
223
        print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)
7675.686.9 by Stuart Bishop
Basic database utilization report
224
225
226
if __name__ == '__main__':
227
    main()