7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
1 |
#!/usr/bin/python -S
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
2 |
# Copyright 2010 Canonical Ltd. This software is licensed under the
|
3 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
|
4 |
||
5 |
"""Generate the database statistics report."""
|
|
6 |
||
7 |
__metaclass__ = type |
|
8 |
||
9 |
import _pythonpath |
|
10 |
||
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
11 |
from datetime import datetime |
7675.686.9
by Stuart Bishop
Basic database utilization report |
12 |
from operator import attrgetter |
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
13 |
from textwrap import dedent |
7675.686.9
by Stuart Bishop
Basic database utilization report |
14 |
|
15 |
from canonical.database.sqlbase import connect, sqlvalues |
|
16 |
from canonical.launchpad.scripts import db_options |
|
17 |
from lp.scripts.helpers import LPOptionParser |
|
18 |
||
19 |
||
20 |
class Table: |
|
21 |
pass
|
|
22 |
||
23 |
||
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
24 |
def get_where_clause(options): |
25 |
"Generate a WHERE clause referencing the date_created column."
|
|
26 |
# We have two of the from timestamp, the until timestamp and an
|
|
27 |
# interval. The interval is in a format unsuitable for processing in
|
|
28 |
# Python. If the interval is set, it represents the period before
|
|
29 |
# the until timestamp or the period after the from timestamp,
|
|
30 |
# depending on which of these is set. From this information,
|
|
31 |
# generate the SQL representation of the from timestamp and the
|
|
32 |
# until timestamp.
|
|
33 |
if options.from_ts: |
|
34 |
from_sql = ("CAST(%s AS timestamp without time zone)" |
|
35 |
% sqlvalues(options.from_ts)) |
|
36 |
elif options.interval and options.until_ts: |
|
37 |
from_sql = ( |
|
38 |
"CAST(%s AS timestamp without time zone) - CAST(%s AS interval)" |
|
39 |
% sqlvalues(options.until_ts, options.interval)) |
|
40 |
elif options.interval: |
|
41 |
from_sql = ( |
|
42 |
"(CURRENT_TIMESTAMP AT TIME ZONE 'UTC') - CAST(%s AS interval)" |
|
43 |
% sqlvalues(options.interval)) |
|
44 |
else: |
|
45 |
from_sql = "CAST('1970-01-01' AS timestamp without time zone)" |
|
46 |
||
47 |
if options.until_ts: |
|
48 |
until_sql = ( |
|
49 |
"CAST(%s AS timestamp without time zone)" |
|
50 |
% sqlvalues(options.until_ts)) |
|
51 |
elif options.interval and options.from_ts: |
|
52 |
until_sql = ( |
|
53 |
"CAST(%s AS timestamp without time zone) + CAST(%s AS interval)" |
|
54 |
% sqlvalues(options.from_ts, options.interval)) |
|
55 |
else: |
|
56 |
until_sql = "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'" |
|
57 |
||
58 |
clause = "date_created BETWEEN (%s) AND (%s)" % (from_sql, until_sql) |
|
59 |
||
60 |
return clause |
|
61 |
||
62 |
||
7675.686.9
by Stuart Bishop
Basic database utilization report |
63 |
def get_table_stats(cur, options): |
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
64 |
params = {'where': get_where_clause(options)} |
65 |
tablestats_query = dedent("""\ |
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
66 |
SELECT
|
67 |
Earliest.date_created AS date_start,
|
|
68 |
Latest.date_created AS date_end,
|
|
69 |
Latest.schemaname,
|
|
70 |
Latest.relname,
|
|
71 |
Latest.seq_scan - Earliest.seq_scan AS seq_scan,
|
|
72 |
Latest.seq_tup_read - Earliest.seq_tup_read AS seq_tup_read,
|
|
73 |
Latest.idx_scan - Earliest.idx_scan AS idx_scan,
|
|
74 |
Latest.idx_tup_fetch - Earliest.idx_tup_fetch AS idx_tup_fetch,
|
|
75 |
Latest.n_tup_ins - Earliest.n_tup_ins AS n_tup_ins,
|
|
76 |
Latest.n_tup_upd - Earliest.n_tup_upd AS n_tup_upd,
|
|
77 |
Latest.n_tup_del - Earliest.n_tup_del AS n_tup_del,
|
|
78 |
Latest.n_tup_hot_upd - Earliest.n_tup_hot_upd AS n_tup_hot_upd,
|
|
79 |
Latest.n_live_tup,
|
|
80 |
Latest.n_dead_tup,
|
|
81 |
Latest.last_vacuum,
|
|
82 |
Latest.last_autovacuum,
|
|
83 |
Latest.last_analyze,
|
|
84 |
Latest.last_autoanalyze
|
|
85 |
FROM
|
|
86 |
DatabaseTableStats AS Earliest,
|
|
87 |
DatabaseTableStats AS Latest
|
|
88 |
WHERE
|
|
89 |
Earliest.date_created = (
|
|
90 |
SELECT min(date_created) FROM DatabaseTableStats
|
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
91 |
WHERE %(where)s) |
7675.686.9
by Stuart Bishop
Basic database utilization report |
92 |
AND Latest.date_created = (
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
93 |
SELECT max(date_created) FROM DatabaseTableStats
|
94 |
WHERE %(where)s) |
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
95 |
AND Earliest.schemaname = Latest.schemaname
|
96 |
AND Earliest.relname = Latest.relname
|
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
97 |
""" % params) |
7675.686.9
by Stuart Bishop
Basic database utilization report |
98 |
cur.execute(tablestats_query) |
99 |
||
100 |
# description[0] is the column name, per PEP-0249
|
|
101 |
fields = [description[0] for description in cur.description] |
|
102 |
tables = set() |
|
103 |
for row in cur.fetchall(): |
|
104 |
table = Table() |
|
105 |
for index in range(len(fields)): |
|
106 |
setattr(table, fields[index], row[index]) |
|
107 |
table.total_tup_read = table.seq_tup_read + table.idx_tup_fetch |
|
108 |
table.total_tup_written = ( |
|
109 |
table.n_tup_ins + table.n_tup_upd + table.n_tup_del) |
|
110 |
tables.add(table) |
|
111 |
||
112 |
return tables |
|
113 |
||
114 |
||
115 |
def get_cpu_stats(cur, options): |
|
7675.686.14
by Stuart Bishop
Fix bogus CPU utilization report |
116 |
# This query calculates the averate cpu utilization from the
|
117 |
# samples. It assumes samples are taken at regular intervals over
|
|
118 |
# the period.
|
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
119 |
# Note that we have to use SUM()/COUNT() instead of AVG() as
|
120 |
# database users not connected when the sample was taken are not
|
|
121 |
# recorded - we want the average utilization over the time period,
|
|
122 |
# not the subset of the time period the user was actually connected.
|
|
123 |
params = {'where': get_where_clause(options)} |
|
124 |
query = dedent("""\ |
|
7675.686.14
by Stuart Bishop
Fix bogus CPU utilization report |
125 |
SELECT (
|
126 |
CAST(SUM(cpu) AS float) / (
|
|
127 |
SELECT COUNT(DISTINCT date_created) FROM DatabaseCpuStats
|
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
128 |
WHERE %(where)s |
129 |
)) AS avg_cpu, username
|
|
7675.686.14
by Stuart Bishop
Fix bogus CPU utilization report |
130 |
FROM DatabaseCpuStats
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
131 |
WHERE %(where)s |
7675.686.9
by Stuart Bishop
Basic database utilization report |
132 |
GROUP BY username
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
133 |
""" % params) |
7675.686.9
by Stuart Bishop
Basic database utilization report |
134 |
cur.execute(query) |
135 |
return set(cur.fetchall()) |
|
136 |
||
137 |
||
138 |
def main(): |
|
139 |
parser = LPOptionParser() |
|
140 |
db_options(parser) |
|
141 |
parser.add_option( |
|
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
142 |
"-f", "--from", dest="from_ts", type=datetime, |
143 |
default=None, metavar="TIMESTAMP", |
|
144 |
help="Use statistics collected since TIMESTAMP.") |
|
145 |
parser.add_option( |
|
146 |
"-u", "--until", dest="until_ts", type=datetime, |
|
147 |
default=None, metavar="TIMESTAMP", |
|
148 |
help="Use statistics collected up until TIMESTAMP.") |
|
149 |
parser.add_option( |
|
150 |
"-i", "--interval", dest="interval", type=str, |
|
151 |
default=None, metavar="INTERVAL", |
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
152 |
help= |
153 |
"Use statistics collected over the last INTERVAL period. "
|
|
154 |
"INTERVAL is a string parsable by PostgreSQL "
|
|
155 |
"such as '5 minutes'.") |
|
156 |
parser.add_option( |
|
157 |
"-n", "--limit", dest="limit", type=int, |
|
158 |
default=15, metavar="NUM", |
|
159 |
help="Display the top NUM items in each category.") |
|
160 |
parser.set_defaults(dbuser="database_stats_report") |
|
161 |
options, args = parser.parse_args() |
|
162 |
||
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
163 |
if options.from_ts and options.until_ts and options.interval: |
164 |
parser.error( |
|
165 |
"Only two of --from, --until and --interval may be specified.") |
|
166 |
||
7675.686.9
by Stuart Bishop
Basic database utilization report |
167 |
con = connect(options.dbuser) |
168 |
cur = con.cursor() |
|
169 |
||
7675.686.18
by Stuart Bishop
Let report-database-stats.py report on arbitrary time periods |
170 |
tables = list(get_table_stats(cur, options)) |
171 |
if len(tables) == 0: |
|
172 |
parser.error("No statistics available in that time range.") |
|
173 |
arbitrary_table = tables[0] |
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
174 |
interval = arbitrary_table.date_end - arbitrary_table.date_start |
7675.686.16
by Stuart Bishop
Correctly calculate number of seconds in a day |
175 |
per_second = float(interval.days * 24 * 60 * 60 + interval.seconds) |
7675.686.9
by Stuart Bishop
Basic database utilization report |
176 |
|
177 |
print "== Most Read Tables ==" |
|
178 |
print
|
|
7675.686.11
by Stuart Bishop
Comments per review feedback |
179 |
# These match the pg_user_table_stats view. schemaname is the
|
180 |
# namespace (normally 'public'), relname is the table (relation)
|
|
181 |
# name. total_tup_red is the total number of rows read.
|
|
182 |
# idx_tup_fetch is the number of rows looked up using an index.
|
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
183 |
tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname'] |
184 |
most_read_tables = sorted( |
|
185 |
tables, key=attrgetter(*tables_sort), reverse=True) |
|
186 |
for table in most_read_tables[:options.limit]: |
|
7675.686.15
by Stuart Bishop
tweak formatting |
187 |
print "%40s || %10.2f tuples/sec" % ( |
188 |
table.relname, table.total_tup_read / per_second) |
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
189 |
print
|
190 |
||
191 |
print "== Most Written Tables ==" |
|
192 |
print
|
|
193 |
tables_sort = [ |
|
194 |
'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname'] |
|
195 |
most_written_tables = sorted( |
|
196 |
tables, key=attrgetter(*tables_sort), reverse=True) |
|
197 |
for table in most_written_tables[:options.limit]: |
|
7675.686.15
by Stuart Bishop
tweak formatting |
198 |
print "%40s || %10.2f tuples/sec" % ( |
199 |
table.relname, table.total_tup_written / per_second) |
|
7675.686.9
by Stuart Bishop
Basic database utilization report |
200 |
print
|
201 |
||
202 |
user_cpu = get_cpu_stats(cur, options) |
|
203 |
print "== Most Active Users ==" |
|
204 |
print
|
|
205 |
for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]: |
|
7675.686.15
by Stuart Bishop
tweak formatting |
206 |
print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10) |
7675.686.9
by Stuart Bishop
Basic database utilization report |
207 |
|
208 |
||
209 |
if __name__ == '__main__': |
|
210 |
main() |