~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#!/usr/bin/python -S
# Copyright 2010-2011 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""Generate the database statistics report."""

__metaclass__ = type

from datetime import datetime
from operator import attrgetter
from textwrap import (
    dedent,
    fill,
    )

import _pythonpath

from canonical.database.sqlbase import (
    connect,
    sqlvalues,
    )
from canonical.launchpad.scripts import db_options
from lp.scripts.helpers import LPOptionParser
from lp.services.database.namedrow import named_fetchall


class Table:
    pass


def get_where_clause(options, fuzz='0 seconds'):
    "Generate a WHERE clause referencing the date_created column."
    # We have two of the from timestamp, the until timestamp and an
    # interval. The interval is in a format unsuitable for processing in
    # Python. If the interval is set, it represents the period before
    # the until timestamp or the period after the from timestamp,
    # depending on which of these is set. From this information,
    # generate the SQL representation of the from timestamp and the
    # until timestamp.
    if options.from_ts:
        from_sql = ("CAST(%s AS timestamp without time zone)"
            % sqlvalues(options.from_ts))
    elif options.interval and options.until_ts:
        from_sql = (
            "CAST(%s AS timestamp without time zone) - CAST(%s AS interval)"
            % sqlvalues(options.until_ts, options.interval))
    elif options.interval:
        from_sql = (
            "(CURRENT_TIMESTAMP AT TIME ZONE 'UTC') - CAST(%s AS interval)"
            % sqlvalues(options.interval))
    else:
        from_sql = "CAST('1970-01-01' AS timestamp without time zone)"

    if options.until_ts:
        until_sql = (
            "CAST(%s AS timestamp without time zone)"
            % sqlvalues(options.until_ts))
    elif options.interval and options.from_ts:
        until_sql = (
            "CAST(%s AS timestamp without time zone) + CAST(%s AS interval)"
            % sqlvalues(options.from_ts, options.interval))
    else:
        until_sql = "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'"

    fuzz_sql = "CAST(%s AS interval)" % sqlvalues(fuzz)
    clause = "date_created BETWEEN (%s - %s) AND (%s + %s)" % (
        from_sql, fuzz_sql, until_sql, fuzz_sql)

    return clause


def get_table_stats(cur, options):
    params = {'where': get_where_clause(options)}
    tablestats_query = dedent("""\
        SELECT
            Earliest.date_created AS date_start,
            Latest.date_created AS date_end,
            Latest.schemaname,
            Latest.relname,
            Latest.seq_scan - Earliest.seq_scan AS seq_scan,
            Latest.seq_tup_read - Earliest.seq_tup_read AS seq_tup_read,
            Latest.idx_scan - Earliest.idx_scan AS idx_scan,
            Latest.idx_tup_fetch - Earliest.idx_tup_fetch AS idx_tup_fetch,
            Latest.n_tup_ins - Earliest.n_tup_ins AS n_tup_ins,
            Latest.n_tup_upd - Earliest.n_tup_upd AS n_tup_upd,
            Latest.n_tup_del - Earliest.n_tup_del AS n_tup_del,
            Latest.n_tup_hot_upd - Earliest.n_tup_hot_upd AS n_tup_hot_upd,
            Latest.n_live_tup,
            Latest.n_dead_tup,
            Latest.last_vacuum,
            Latest.last_autovacuum,
            Latest.last_analyze,
            Latest.last_autoanalyze
        FROM
            DatabaseTableStats AS Earliest,
            DatabaseTableStats AS Latest
        WHERE
            Earliest.date_created = (
                SELECT min(date_created) FROM DatabaseTableStats
                WHERE %(where)s)
            AND Latest.date_created = (
                SELECT max(date_created) FROM DatabaseTableStats
                WHERE %(where)s)
            AND Earliest.schemaname = Latest.schemaname
            AND Earliest.relname = Latest.relname
        """ % params)
    cur.execute(tablestats_query)

    # description[0] is the column name, per PEP-0249
    fields = [description[0] for description in cur.description]
    tables = set()
    for row in cur.fetchall():
        table = Table()
        for index in range(len(fields)):
            setattr(table, fields[index], row[index])
        table.total_tup_read = table.seq_tup_read + table.idx_tup_fetch
        table.total_tup_written = (
            table.n_tup_ins + table.n_tup_upd + table.n_tup_del)
        tables.add(table)

    return tables


def get_cpu_stats(cur, options):
    # This query calculates the averate cpu utilization from the
    # samples. It assumes samples are taken at regular intervals over
    # the period.
    # Note that we have to use SUM()/COUNT() instead of AVG() as
    # database users not connected when the sample was taken are not
    # recorded - we want the average utilization over the time period,
    # not the subset of the time period the user was actually connected.
    params = {'where': get_where_clause(options)}
    query = dedent("""\
        SELECT (
            CAST(SUM(cpu) AS float) / (
                SELECT COUNT(DISTINCT date_created) FROM DatabaseCpuStats
                WHERE %(where)s
            )) AS avg_cpu, username
        FROM DatabaseCpuStats
        WHERE %(where)s
        GROUP BY username
        """ % params)
    cur.execute(query)
    cpu_stats = set(cur.fetchall())

    # Fold edge into lpnet, as they are now running the same code.
    # This is a temporary hack until we drop edge entirely. See
    # Bug #667883 for details.
    lpnet_avg_cpu = 0.0
    edge_avg_cpu = 0.0
    for stats_tuple in list(cpu_stats):
        avg_cpu, username = stats_tuple
        if username == 'lpnet':
            lpnet_avg_cpu = avg_cpu
            cpu_stats.discard(stats_tuple)
        elif username == 'edge':
            edge_avg_cpu = avg_cpu
            cpu_stats.discard(stats_tuple)
    cpu_stats.add((lpnet_avg_cpu + edge_avg_cpu, 'lpnet'))

    return cpu_stats


def get_bloat_stats(cur, options, kind):
    # Return information on bloated tables and indexes, as of the end of
    # the requested time period.
    params = {
        # We only collect these statistics daily, so add some fuzz
        # to ensure bloat information ends up on the daily reports;
        # we cannot guarantee the disk utilization statistics occur
        # exactly 24 hours apart. Our most recent snapshot could be 1
        # day ago, give or take a few hours.
        'where': get_where_clause(options, fuzz='1 day 6 hours'),
        'bloat': options.bloat,
        'min_bloat': options.min_bloat,
        'kind': kind,
        }
    query = dedent("""
        SELECT * FROM (
            SELECT DISTINCT
                namespace,
                name,
                sub_namespace,
                sub_name,
                count(*) OVER t AS num_samples,
                last_value(table_len) OVER t AS table_len,
                pg_size_pretty(last_value(table_len) OVER t) AS table_size,
                last_value(dead_tuple_len + free_space) OVER t AS bloat_len,
                pg_size_pretty(last_value(dead_tuple_len + free_space) OVER t)
                    AS bloat_size,
                first_value(dead_tuple_percent + free_percent) OVER t
                    AS start_bloat_percent,
                last_value(dead_tuple_percent + free_percent) OVER t
                    AS end_bloat_percent,
                (last_value(dead_tuple_percent + free_percent) OVER t
                    - first_value(dead_tuple_percent + free_percent) OVER t
                    ) AS delta_bloat_percent,
                (last_value(table_len) OVER t
                    - first_value(table_len) OVER t) AS delta_bloat_len,
                pg_size_pretty(
                    last_value(table_len) OVER t
                    - first_value(table_len) OVER t) AS delta_bloat_size
            FROM DatabaseDiskUtilization
            WHERE
                %(where)s
                AND kind = %%(kind)s
            WINDOW t AS (
                PARTITION BY sort ORDER BY date_created
                ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
            ) AS whatever
        WHERE
            table_len >= %(min_bloat)s
            AND end_bloat_percent >= %(bloat)s
        ORDER BY bloat_len DESC
        """ % params)
    cur.execute(query, params)
    bloat_stats = named_fetchall(cur)
    return list(bloat_stats)


def main():
    parser = LPOptionParser()
    db_options(parser)
    parser.add_option(
        "-f", "--from", dest="from_ts", type=datetime,
        default=None, metavar="TIMESTAMP",
        help="Use statistics collected since TIMESTAMP.")
    parser.add_option(
        "-u", "--until", dest="until_ts", type=datetime,
        default=None, metavar="TIMESTAMP",
        help="Use statistics collected up until TIMESTAMP.")
    parser.add_option(
        "-i", "--interval", dest="interval", type=str,
        default=None, metavar="INTERVAL",
        help=(
            "Use statistics collected over the last INTERVAL period. "
            "INTERVAL is a string parsable by PostgreSQL "
            "such as '5 minutes'."))
    parser.add_option(
        "-n", "--limit", dest="limit", type=int,
        default=15, metavar="NUM",
        help="Display the top NUM items in each category.")
    parser.add_option(
        "-b", "--bloat", dest="bloat", type=float,
        default=40, metavar="BLOAT",
        help="Display tables and indexes bloated by more than BLOAT%.")
    parser.add_option(
        "--min-bloat", dest="min_bloat", type=int,
        default=10000000, metavar="BLOAT",
        help="Don't report tables bloated less than BLOAT bytes.")
    parser.set_defaults(dbuser="database_stats_report")
    options, args = parser.parse_args()

    if options.from_ts and options.until_ts and options.interval:
        parser.error(
            "Only two of --from, --until and --interval may be specified.")

    con = connect()
    cur = con.cursor()

    tables = list(get_table_stats(cur, options))
    if len(tables) == 0:
        parser.error("No statistics available in that time range.")
    arbitrary_table = tables[0]
    interval = arbitrary_table.date_end - arbitrary_table.date_start
    per_second = float(interval.days * 24 * 60 * 60 + interval.seconds)
    if per_second == 0:
        parser.error("Only one sample in that time range.")

    user_cpu = get_cpu_stats(cur, options)
    print "== Most Active Users =="
    print
    for cpu, username in sorted(user_cpu, reverse=True)[:options.limit]:
        print "%40s || %10.2f%% CPU" % (username, float(cpu) / 10)

    print
    print "== Most Written Tables =="
    print
    tables_sort = [
        'total_tup_written', 'n_tup_upd', 'n_tup_ins', 'n_tup_del', 'relname']
    most_written_tables = sorted(
        tables, key=attrgetter(*tables_sort), reverse=True)
    for table in most_written_tables[:options.limit]:
        print "%40s || %10.2f tuples/sec" % (
            table.relname, table.total_tup_written / per_second)

    print
    print "== Most Read Tables =="
    print
    # These match the pg_user_table_stats view. schemaname is the
    # namespace (normally 'public'), relname is the table (relation)
    # name. total_tup_red is the total number of rows read.
    # idx_tup_fetch is the number of rows looked up using an index.
    tables_sort = ['total_tup_read', 'idx_tup_fetch', 'schemaname', 'relname']
    most_read_tables = sorted(
        tables, key=attrgetter(*tables_sort), reverse=True)
    for table in most_read_tables[:options.limit]:
        print "%40s || %10.2f tuples/sec" % (
            table.relname, table.total_tup_read / per_second)

    table_bloat_stats = get_bloat_stats(cur, options, 'r')

    if not table_bloat_stats:
        print
        print "(There is no bloat information available in this time range.)"

    else:
        print
        print "== Most Bloated Tables =="
        print
        for bloated_table in table_bloat_stats[:options.limit]:
            print "%40s || %2d%% || %s of %s" % (
                bloated_table.name,
                bloated_table.end_bloat_percent,
                bloated_table.bloat_size,
                bloated_table.table_size)

        index_bloat_stats = get_bloat_stats(cur, options, 'i')

        print
        print "== Most Bloated Indexes =="
        print
        for bloated_index in index_bloat_stats[:options.limit]:
            print "%65s || %2d%% || %s of %s" % (
                bloated_index.sub_name,
                bloated_index.end_bloat_percent,
                bloated_index.bloat_size,
                bloated_index.table_size)

        # Order bloat delta report by size of bloat increase.
        # We might want to change this to percentage bloat increase.
        bloating_sort_key = lambda x: x.delta_bloat_len

        table_bloating_stats = sorted(
            table_bloat_stats, key=bloating_sort_key, reverse=True)

        if table_bloating_stats[0].num_samples <= 1:
            print
            print fill(dedent("""\
                (There are not enough samples in this time range to display
                bloat change statistics)
                """))
        else:
            print
            print "== Most Bloating Tables =="
            print

            for bloated_table in table_bloating_stats[:options.limit]:
                # Bloat decreases are uninteresting, and would need to be in
                # a separate table sorted in reverse anyway.
                if bloated_table.delta_bloat_percent > 0:
                    print "%40s || +%4.2f%% || +%s" % (
                        bloated_table.name,
                        bloated_table.delta_bloat_percent,
                        bloated_table.delta_bloat_size)

            index_bloating_stats = sorted(
                index_bloat_stats, key=bloating_sort_key, reverse=True)

            print
            print "== Most Bloating Indexes =="
            print
            for bloated_index in index_bloating_stats[:options.limit]:
                # Bloat decreases are uninteresting, and would need to be in
                # a separate table sorted in reverse anyway.
                if bloated_index.delta_bloat_percent > 0:
                    print "%65s || +%4.2f%% || +%s" % (
                        bloated_index.sub_name,
                        bloated_index.delta_bloat_percent,
                        bloated_index.delta_bloat_size)


if __name__ == '__main__':
    main()