~launchpad-pqm/launchpad/devel

10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
1
# Copyright 2010 Canonical Ltd.  This software is licensed under the
2
# GNU Affero General Public License version 3 (see the file LICENSE).
3
4
"""Page performance report generated from zserver trace logs."""
5
6
__metaclass__ = type
10209.2.11 by Stuart Bishop
Handle real data
7
__all__ = ['main']
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
8
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
9
import bz2
10209.2.3 by Stuart Bishop
HTML report + JS graphs
10
from cgi import escape as html_quote
7675.916.98 by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes.
11
from ConfigParser import RawConfigParser
7675.991.2 by Jeroen Vermeulen
Roll back lp:~launchpad/launchpad/recife.
12
import copy
7675.916.98 by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes.
13
import cPickle
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
14
import csv
10209.2.11 by Stuart Bishop
Handle real data
15
from datetime import datetime
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
16
import gzip
17
import math
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
18
import os.path
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
19
import re
7675.916.98 by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes.
20
import textwrap
7675.991.2 by Jeroen Vermeulen
Roll back lp:~launchpad/launchpad/recife.
21
from textwrap import dedent
10209.2.3 by Stuart Bishop
HTML report + JS graphs
22
import time
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
23
10209.2.3 by Stuart Bishop
HTML report + JS graphs
24
import simplejson as json
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
25
import sre_constants
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
26
import zc.zservertracelog.tracereport
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
27
10209.2.4 by Stuart Bishop
Shuffle files
28
from canonical.config import config
10209.2.11 by Stuart Bishop
Handle real data
29
from canonical.launchpad.scripts.logger import log
10209.2.7 by Stuart Bishop
Date range filter
30
from lp.scripts.helpers import LPOptionParser
10209.2.4 by Stuart Bishop
Shuffle files
31
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
32
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
33
class Request(zc.zservertracelog.tracereport.Request):
34
    url = None
35
    pageid = None
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
36
    ticks = None
37
    sql_statements = None
38
    sql_seconds = None
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
39
10209.2.28 by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second
40
    # Override the broken version in our superclass that always
41
    # returns an integer.
42
    @property
43
    def app_seconds(self):
44
        interval = self.app_time - self.start_app_time
45
        return interval.seconds + interval.microseconds / 1000000.0
46
47
    # Override the broken version in our superclass that always
48
    # returns an integer.
49
    @property
50
    def total_seconds(self):
51
        interval = self.end - self.start
52
        return interval.seconds + interval.microseconds / 1000000.0
53
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
54
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
55
class Category:
10209.2.19 by Stuart Bishop
Docstrings and make regexps case insensitive
56
    """A Category in our report.
57
58
    Requests belong to a Category if the URL matches a regular expression.
59
    """
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
60
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
61
    def __init__(self, title, regexp):
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
62
        self.title = title
63
        self.regexp = regexp
10209.2.19 by Stuart Bishop
Docstrings and make regexps case insensitive
64
        self._compiled_regexp = re.compile(regexp, re.I | re.X)
12929.6.5 by Francis J. Lacoste
Add support for defining categories that should represent a partition.
65
        self.partition = False
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
66
67
    def match(self, request):
68
        """Return true when the request match this category."""
69
        return self._compiled_regexp.search(request.url) is not None
10209.2.3 by Stuart Bishop
HTML report + JS graphs
70
10209.2.20 by Stuart Bishop
Basic report ordering
71
    def __cmp__(self, other):
72
        return cmp(self.title.lower(), other.title.lower())
73
11775.2.26 by Francis J. Lacoste
Implement __add__ for RequestTimes.
74
    def __deepcopy__(self, memo):
75
        # We provide __deepcopy__ because the module doesn't handle
76
        # compiled regular expression by default.
77
        return Category(self.title, self.regexp)
78
10209.2.3 by Stuart Bishop
HTML report + JS graphs
79
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
80
class OnlineStatsCalculator:
81
    """Object that can compute count, sum, mean, variance and median.
82
83
    It computes these value incrementally and using minimal storage
84
    using the Welford / Knuth algorithm described at
85
    http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
86
    """
87
88
    def __init__(self):
89
        self.count = 0
90
        self.sum = 0
91
        self.M2 = 0.0 # Sum of square difference
92
        self.mean = 0.0
93
94
    def update(self, x):
95
        """Incrementally update the stats when adding x to the set.
96
97
        None values are ignored.
98
        """
99
        if x is None:
100
            return
101
        self.count += 1
102
        self.sum += x
103
        delta = x - self.mean
104
        self.mean = float(self.sum)/self.count
105
        self.M2 += delta*(x - self.mean)
106
107
    @property
108
    def variance(self):
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
109
        """Return the population variance."""
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
110
        if self.count == 0:
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
111
            return 0
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
112
        else:
113
            return self.M2/self.count
114
115
    @property
116
    def std(self):
117
        """Return the standard deviation."""
118
        if self.count == 0:
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
119
            return 0
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
120
        else:
121
            return math.sqrt(self.variance)
122
11775.2.23 by Francis J. Lacoste
Implement adding two OnlineStatsCalculator.
123
    def __add__(self, other):
124
        """Adds this and another OnlineStatsCalculator.
125
126
        The result combines the stats of the two objects.
127
        """
128
        results = OnlineStatsCalculator()
129
        results.count = self.count + other.count
130
        results.sum = self.sum + other.sum
131
        if self.count > 0 and other.count > 0:
11775.2.29 by Francis J. Lacoste
Typo and lint.
132
            # This is 2.1b in Chan, Tony F.; Golub, Gene H.; LeVeque,
11775.2.23 by Francis J. Lacoste
Implement adding two OnlineStatsCalculator.
133
            # Randall J. (1979), "Updating Formulae and a Pairwise Algorithm
134
            # for Computing Sample Variances.",
135
            # Technical Report STAN-CS-79-773,
11775.2.29 by Francis J. Lacoste
Typo and lint.
136
            # Department of Computer Science, Stanford University,
11775.2.23 by Francis J. Lacoste
Implement adding two OnlineStatsCalculator.
137
            # ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf .
138
            results.M2 = self.M2 + other.M2 + (
11775.2.31 by Francis J. Lacoste
Clarity improvements.
139
                (float(self.count) / (other.count * results.count)) *
140
                ((float(other.count) / self.count) * self.sum - other.sum)**2)
11775.2.23 by Francis J. Lacoste
Implement adding two OnlineStatsCalculator.
141
        else:
142
            results.M2 = self.M2 + other.M2 # One of them is 0.
143
        if results.count > 0:
11775.2.31 by Francis J. Lacoste
Clarity improvements.
144
            results.mean = float(results.sum) / results.count
11775.2.23 by Francis J. Lacoste
Implement adding two OnlineStatsCalculator.
145
        return results
146
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
147
148
class OnlineApproximateMedian:
149
    """Approximate the median of a set of elements.
150
151
    This implements a space-efficient algorithm which only sees each value
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
152
    once. (It will hold in memory log bucket_size of n elements.)
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
153
154
    It was described and analysed in
11775.2.20 by Francis J. Lacoste
Typos.
155
    D. Cantone and  M.Hofri,
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
156
    "Analysis of An Approximate Median Selection Algorithm"
157
    ftp://ftp.cs.wpi.edu/pub/techreports/pdf/06-17.pdf
158
159
    This algorithm is similar to Tukey's median of medians technique.
11775.2.20 by Francis J. Lacoste
Typos.
160
    It will compute the median among bucket_size values. And the median among
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
161
    those.
162
    """
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
163
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
164
    def __init__(self, bucket_size=9):
165
        """Creates a new estimator.
166
167
        It approximates the median by finding the median among each
168
        successive bucket_size element. And then using these medians for other
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
169
        rounds of selection.
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
170
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
171
        The bucket size should be a low odd-integer.
172
        """
173
        self.bucket_size = bucket_size
174
        # Index of the median in a completed bucket.
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
175
        self.median_idx = (bucket_size-1)//2
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
176
        self.buckets = []
177
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
178
    def update(self, x, order=0):
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
179
        """Update with x."""
180
        if x is None:
181
            return
182
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
183
        i = order
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
184
        while True:
185
            # Create bucket on demand.
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
186
            if i >= len(self.buckets):
187
                for n in range((i+1)-len(self.buckets)):
188
                    self.buckets.append([])
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
189
            bucket = self.buckets[i]
190
            bucket.append(x)
191
            if len(bucket) == self.bucket_size:
192
                # Select the median in this bucket, and promote it.
193
                x = sorted(bucket)[self.median_idx]
194
                # Free the bucket for the next round.
195
                del bucket[:]
196
                i += 1
197
                continue
198
            else:
199
                break
200
201
    @property
202
    def median(self):
203
        """Return the median."""
204
        # Find the 'weighted' median by assigning a weight to each
205
        # element proportional to how far they have been selected.
206
        candidates = []
207
        total_weight = 0
208
        for i, bucket in enumerate(self.buckets):
209
            weight = self.bucket_size ** i
210
            for x in bucket:
211
                total_weight += weight
11775.4.1 by Francis J. Lacoste
Better algorithm for the weighted median.
212
                candidates.append([x, weight])
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
213
        if len(candidates) == 0:
214
            return 0
11775.2.30 by Francis J. Lacoste
Merge reviewed changes.
215
11775.4.1 by Francis J. Lacoste
Better algorithm for the weighted median.
216
        # Each weight is the equivalent of having the candidates appear
217
        # that number of times in the array.
218
        # So buckets like [[1, 2], [2, 3], [4, 2]] would be expanded to
219
        # [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4,
220
        # 4, 4, 4, 4, 4] and we find the median of that list (2).
221
        # We don't expand the items to conserve memory.
222
        median = (total_weight-1) / 2
223
        weighted_idx = 0
224
        for x, weight in sorted(candidates):
225
            weighted_idx += weight
226
            if weighted_idx > median:
227
                return x
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
228
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
229
    def __add__(self, other):
11775.2.29 by Francis J. Lacoste
Typo and lint.
230
        """Merge two approximators together.
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
231
232
        All candidates from the other are merged through the standard
233
        algorithm, starting at the same level. So an item that went through
234
        two rounds of selection, will be compared with other items having
235
        gone through the same number of rounds.
236
        """
237
        results = OnlineApproximateMedian(self.bucket_size)
238
        results.buckets = copy.deepcopy(self.buckets)
11775.2.29 by Francis J. Lacoste
Typo and lint.
239
        for i, bucket in enumerate(other.buckets):
11775.2.24 by Francis J. Lacoste
Implement merging to median approximator.
240
            for x in bucket:
241
                results.update(x, i)
242
        return results
243
11775.2.17 by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm.
244
10209.2.27 by Stuart Bishop
Total hits
245
class Stats:
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
246
    """Bag to hold and compute request statistics.
10209.2.27 by Stuart Bishop
Total hits
247
248
    All times are in seconds.
249
    """
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
250
    total_hits = 0 # Total hits.
251
10209.2.27 by Stuart Bishop
Total hits
252
    total_time = 0 # Total time spent rendering.
253
    mean = 0 # Mean time per hit.
254
    median = 0 # Median time per hit.
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
255
    std = 0 # Standard deviation per hit.
10209.2.27 by Stuart Bishop
Total hits
256
    histogram = None # # Request times histogram.
257
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
258
    total_sqltime = 0 # Total time spent waiting for SQL to process.
259
    mean_sqltime = 0 # Mean time spend waiting for SQL to process.
260
    median_sqltime = 0 # Median time spend waiting for SQL to process.
261
    std_sqltime = 0 # Standard deviation of SQL time.
262
263
    total_sqlstatements = 0 # Total number of SQL statements issued.
264
    mean_sqlstatements = 0
265
    median_sqlstatements = 0
266
    std_sqlstatements = 0
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
267
11775.2.8 by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now.
268
    @property
269
    def ninetyninth_percentile_time(self):
270
        """Time under which 99% of requests are rendered.
271
11775.2.16 by Francis J. Lacoste
Typo.
272
        This is estimated as 3 std deviations from the mean. Given that
11775.2.8 by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now.
273
        in a daily report, many URLs or PageIds won't have 100 requests, it's
274
        more useful to use this estimator.
10209.2.19 by Stuart Bishop
Docstrings and make regexps case insensitive
275
        """
11775.2.8 by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now.
276
        return self.mean + 3*self.std
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
277
11775.2.11 by Francis J. Lacoste
Plots expect a relative histogram.
278
    @property
12929.6.1 by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under
279
    def ninetyninth_percentile_sqltime(self):
280
        """SQL time under which 99% of requests are rendered.
281
282
        This is estimated as 3 std deviations from the mean.
283
        """
284
        return self.mean_sqltime + 3*self.std_sqltime
285
286
    @property
287
    def ninetyninth_percentile_sqlstatements(self):
288
        """Number of SQL statements under which 99% of requests are rendered.
289
290
        This is estimated as 3 std deviations from the mean.
291
        """
292
        return self.mean_sqlstatements + 3*self.std_sqlstatements
293
11775.2.7 by Francis J. Lacoste
Add a text() method to Stats, increase the cache size to 400M and set a basic regression test.
294
    def text(self):
295
        """Return a textual version of the stats."""
296
        return textwrap.dedent("""
297
        <Stats for %d requests:
298
            Time:     total=%.2f; mean=%.2f; median=%.2f; std=%.2f
299
            SQL time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f
300
            SQL stmt: total=%.f;  mean=%.2f; median=%.f; std=%.2f
301
            >""" % (
302
                self.total_hits, self.total_time, self.mean, self.median,
303
                self.std, self.total_sqltime, self.mean_sqltime,
304
                self.median_sqltime, self.std_sqltime,
305
                self.total_sqlstatements, self.mean_sqlstatements,
306
                self.median_sqlstatements, self.std_sqlstatements))
307
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
308
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
309
class OnlineStats(Stats):
310
    """Implementation of stats that can be computed online.
311
312
    You call update() for each request and the stats are updated incrementally
313
    with minimum storage space.
314
    """
315
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
316
    def __init__(self, histogram_width, histogram_resolution):
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
317
        self.time_stats = OnlineStatsCalculator()
318
        self.time_median_approximate = OnlineApproximateMedian()
319
        self.sql_time_stats = OnlineStatsCalculator()
320
        self.sql_time_median_approximate = OnlineApproximateMedian()
321
        self.sql_statements_stats = OnlineStatsCalculator()
322
        self.sql_statements_median_approximate = OnlineApproximateMedian()
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
323
        self.histogram = Histogram(histogram_width, histogram_resolution)
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
324
325
    @property
326
    def total_hits(self):
327
        return self.time_stats.count
328
329
    @property
330
    def total_time(self):
331
        return self.time_stats.sum
332
333
    @property
334
    def mean(self):
335
        return self.time_stats.mean
336
337
    @property
338
    def median(self):
339
        return self.time_median_approximate.median
340
341
    @property
342
    def std(self):
343
        return self.time_stats.std
344
345
    @property
346
    def total_sqltime(self):
347
        return self.sql_time_stats.sum
348
349
    @property
350
    def mean_sqltime(self):
351
        return self.sql_time_stats.mean
352
353
    @property
354
    def median_sqltime(self):
355
        return self.sql_time_median_approximate.median
356
357
    @property
358
    def std_sqltime(self):
359
        return self.sql_time_stats.std
360
361
    @property
362
    def total_sqlstatements(self):
363
        return self.sql_statements_stats.sum
364
365
    @property
366
    def mean_sqlstatements(self):
367
        return self.sql_statements_stats.mean
368
369
    @property
370
    def median_sqlstatements(self):
371
        return self.sql_statements_median_approximate.median
372
373
    @property
374
    def std_sqlstatements(self):
375
        return self.sql_statements_stats.std
376
377
    def update(self, request):
378
        """Update the stats based on request."""
379
        self.time_stats.update(request.app_seconds)
380
        self.time_median_approximate.update(request.app_seconds)
381
        self.sql_time_stats.update(request.sql_seconds)
382
        self.sql_time_median_approximate.update(request.sql_seconds)
383
        self.sql_statements_stats.update(request.sql_statements)
384
        self.sql_statements_median_approximate.update(request.sql_statements)
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
385
        self.histogram.update(request.app_seconds)
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
386
11775.2.25 by Francis J. Lacoste
Allow adding two OnlineStats objects together.
387
    def __add__(self, other):
388
        """Merge another OnlineStats with this one."""
389
        results = copy.deepcopy(self)
390
        results.time_stats += other.time_stats
391
        results.time_median_approximate += other.time_median_approximate
392
        results.sql_time_stats += other.sql_time_stats
393
        results.sql_time_median_approximate += (
394
            other.sql_time_median_approximate)
395
        results.sql_statements_stats += other.sql_statements_stats
396
        results.sql_statements_median_approximate += (
397
            other.sql_statements_median_approximate)
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
398
        results.histogram = self.histogram + other.histogram
11775.2.25 by Francis J. Lacoste
Allow adding two OnlineStats objects together.
399
        return results
400
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
401
12929.6.9 by Francis J. Lacoste
Add Histogram and tests.
402
class Histogram:
403
    """A simple object to compute histogram of a value."""
404
405
    @staticmethod
406
    def from_bins_data(data):
407
        """Create an histogram from existing bins data."""
408
        assert data[0][0] == 0, "First bin should start at zero."
409
410
        hist = Histogram(len(data), data[1][0])
411
        for idx, bin in enumerate(data):
412
            hist.count += bin[1]
413
            hist.bins[idx][1] = bin[1]
414
415
        return hist
416
417
    def __init__(self, bins_count, bins_size):
418
        """Create a new histogram.
419
420
        The histogram will count the frequency of values in bins_count bins
421
        of bins_size each.
422
        """
423
        self.count = 0
424
        self.bins_count = bins_count
425
        self.bins_size = bins_size
426
        self.bins = []
427
        for x in range(bins_count):
428
            self.bins.append([x*bins_size, 0])
429
430
    @property
431
    def bins_relative(self):
432
        """Return the bins with the frequency expressed as a ratio."""
433
        return [[x, float(f)/self.count] for x, f in self.bins]
434
435
    def update(self, value):
436
        """Update the histogram for this value.
437
438
        All values higher than the last bin minimum are counted in that last
439
        bin.
440
        """
441
        self.count += 1
442
        idx = int(min(self.bins_count-1, value / self.bins_size))
443
        self.bins[idx][1] += 1
444
445
    def __repr__(self):
446
        """A string representation of this histogram."""
447
        return "<Histogram %s>" % self.bins
448
449
    def __eq__(self, other):
450
        """Two histogram are equals if they have the same bins content."""
451
        if not isinstance(other, Histogram):
452
            return False
453
454
        if self.bins_count != other.bins_count:
455
            return False
456
457
        if self.bins_size != other.bins_size:
458
            return False
459
460
        for idx, other_bin in enumerate(other.bins):
461
            if self.bins[idx][1] != other_bin[1]:
462
                return False
463
464
        return True
465
466
    def __add__(self, other):
467
        """Add the frequency of the other histogram to this one.
468
469
        The resulting histogram has the same bins_size than this one.
470
        If the other one has a bigger bins_size, we'll assume an even
471
        distribution and distribute the frequency across the smaller bins. If
472
        it has a lower bin_size, we'll aggregate its bins into the larger
473
        ones. We only support different bins_size if the ratio can be
474
        expressed as the ratio between 1 and an integer.
475
476
        The resulting histogram is as wide as the widest one.
477
        """
478
        ratio = float(other.bins_size) / self.bins_size
479
        bins_count = max(self.bins_count, math.ceil(other.bins_count * ratio))
480
        total = Histogram(int(bins_count), self.bins_size)
481
        total.count = self.count + other.count
482
483
        # Copy our bins into the total
484
        for idx, bin in enumerate(self.bins):
485
            total.bins[idx][1] = bin[1]
486
487
        assert int(ratio) == ratio or int(1/ratio) == 1/ratio, (
488
            "We only support different bins size when the ratio is an "
489
            "integer to 1: "
490
            % ratio)
491
492
        if ratio >= 1:
493
            # We distribute the frequency across the bins.
494
            # For example. if the ratio is 3:1, we'll add a third
495
            # of the lower resolution bin to 3 of the higher one.
496
            for other_idx, bin in enumerate(other.bins):
497
                f = bin[1] / ratio
498
                start = int(math.floor(other_idx * ratio))
499
                end = int(start + ratio)
500
                for idx in range(start, end):
501
                    total.bins[idx][1] += f
502
        else:
503
            # We need to collect the higher resolution bins into the
504
            # corresponding lower one.
505
            for other_idx, bin in enumerate(other.bins):
506
                idx = int(other_idx * ratio)
507
                total.bins[idx][1] += bin[1]
508
509
        return total
510
511
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
512
class RequestTimes:
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
513
    """Collect statistics from requests.
514
515
    Statistics are updated by calling the add_request() method.
516
517
    Statistics for mean/stddev/total/median for request times, SQL times and
518
    number of SQL statements are collected.
519
520
    They are grouped by Category, URL or PageID.
521
    """
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
522
523
    def __init__(self, categories, options):
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
524
        self.by_pageids = options.pageids
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
525
        self.top_urls = options.top_urls
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
526
        # We only keep in memory 50 times the number of URLs we want to
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
527
        # return. The number of URLs can go pretty high (because of the
528
        # distinct query parameters).
529
        #
530
        # Keeping all in memory at once is prohibitive. On a small but
531
        # representative sample, keeping 50 times the possible number of
532
        # candidates and culling to 90% on overflow, generated an identical
533
        # report than keeping all the candidates in-memory.
534
        #
535
        # Keeping 10 times or culling at 90% generated a near-identical report
536
        # (it differed a little in the tail.)
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
537
        #
538
        # The size/cull parameters might need to change if the requests
539
        # distribution become very different than what it currently is.
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
540
        self.top_urls_cache_size = self.top_urls * 50
11775.2.8 by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now.
541
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
542
        # Histogram has a bin per resolution up to our timeout
543
        #(and an extra bin).
544
        self.histogram_resolution = float(options.resolution)
545
        self.histogram_width = int(
546
            options.timeout / self.histogram_resolution) + 1
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
547
        self.category_times = [
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
548
            (category, OnlineStats(
549
                self.histogram_width, self.histogram_resolution))
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
550
            for category in categories]
551
        self.url_times = {}
552
        self.pageid_times = {}
11775.2.6 by Francis J. Lacoste
Add initial unit tests. Create a histogram join table and store missing values as Null.
553
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
554
    def add_request(self, request):
11775.4.2 by Francis J. Lacoste
Typos and docstring updates.
555
        """Add request to the set of requests we collect stats for."""
12929.6.5 by Francis J. Lacoste
Add support for defining categories that should represent a partition.
556
        matched = []
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
557
        for category, stats in self.category_times:
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
558
            if category.match(request):
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
559
                stats.update(request)
12929.6.5 by Francis J. Lacoste
Add support for defining categories that should represent a partition.
560
                if category.partition:
561
                    matched.append(category.title)
562
563
        if len(matched) > 1:
564
            log.warning(
565
                "Multiple partition categories matched by %s (%s)",
566
                request.url, ", ".join(matched))
567
        elif not matched:
568
            log.warning("%s isn't part of the partition", request.url)
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
569
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
570
        if self.by_pageids:
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
571
            pageid = request.pageid or 'Unknown'
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
572
            stats = self.pageid_times.setdefault(
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
573
                pageid, OnlineStats(
574
                    self.histogram_width, self.histogram_resolution))
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
575
            stats.update(request)
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
576
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
577
        if self.top_urls:
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
578
            stats = self.url_times.setdefault(
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
579
                request.url, OnlineStats(
580
                    self.histogram_width, self.histogram_resolution))
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
581
            stats.update(request)
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
582
            #  Whenever we have more URLs than we need to, discard 10%
583
            # that is less likely to end up in the top.
584
            if len(self.url_times) > self.top_urls_cache_size:
585
                cutoff = int(self.top_urls_cache_size*0.90)
586
                self.url_times = dict(
587
                    sorted(self.url_times.items(),
11775.2.31 by Francis J. Lacoste
Clarity improvements.
588
                    key=lambda (url, stats): stats.total_time,
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
589
                    reverse=True)[:cutoff])
590
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
591
    def get_category_times(self):
592
        """Return the times for each category."""
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
593
        return self.category_times
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
594
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
595
    def get_top_urls_times(self):
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
596
        """Return the times for the Top URL by total time"""
597
        # Sort the result by total time
598
        return sorted(
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
599
            self.url_times.items(),
11775.2.31 by Francis J. Lacoste
Clarity improvements.
600
            key=lambda (url, stats): stats.total_time,
601
            reverse=True)[:self.top_urls]
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
602
603
    def get_pageid_times(self):
604
        """Return the times for the pageids."""
11775.2.10 by Francis J. Lacoste
Refactor get_category_times to use a template method and implemented get_top_url_times() and get_pageid_times() on top of it.
605
        # Sort the result by pageid
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
606
        return sorted(self.pageid_times.items())
10209.2.48 by Stuart Bishop
Top N URLs by hit count
607
11775.2.26 by Francis J. Lacoste
Implement __add__ for RequestTimes.
608
    def __add__(self, other):
609
        """Merge two RequestTimes together."""
610
        results = copy.deepcopy(self)
611
        for other_category, other_stats in other.category_times:
612
            for i, (category, stats) in enumerate(self.category_times):
613
                if category.title == other_category.title:
614
                    results.category_times[i] = (
615
                        category, stats + other_stats)
616
                    break
11775.2.31 by Francis J. Lacoste
Clarity improvements.
617
            else:
11775.2.26 by Francis J. Lacoste
Implement __add__ for RequestTimes.
618
                results.category_times.append(
619
                    (other_category, copy.deepcopy(other_stats)))
620
621
        url_times = results.url_times
622
        for url, stats in other.url_times.items():
623
            if url in url_times:
624
                url_times[url] += stats
625
            else:
626
                url_times[url] = copy.deepcopy(stats)
11775.2.28 by Francis J. Lacoste
Crop the url_times after merge.
627
        # Only keep top_urls_cache_size entries.
628
        if len(self.url_times) > self.top_urls_cache_size:
629
            self.url_times = dict(
11775.2.31 by Francis J. Lacoste
Clarity improvements.
630
                sorted(
631
                    url_times.items(),
632
                    key=lambda (url, stats): stats.total_time,
633
                    reverse=True)[:self.top_urls_cache_size])
11775.2.26 by Francis J. Lacoste
Implement __add__ for RequestTimes.
634
635
        pageid_times = results.pageid_times
636
        for pageid, stats in other.pageid_times.items():
637
            if pageid in pageid_times:
638
                pageid_times[pageid] += stats
639
            else:
640
                pageid_times[pageid] = copy.deepcopy(stats)
641
642
        return results
643
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
644
645
def main():
10209.2.7 by Stuart Bishop
Date range filter
646
    parser = LPOptionParser("%prog [args] tracelog [...]")
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
647
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
648
    parser.add_option(
649
        "-c", "--config", dest="config",
10209.2.4 by Stuart Bishop
Shuffle files
650
        default=os.path.join(
651
            config.root, "utilities", "page-performance-report.ini"),
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
652
        metavar="FILE", help="Load configuration from FILE")
10209.2.3 by Stuart Bishop
HTML report + JS graphs
653
    parser.add_option(
10209.2.7 by Stuart Bishop
Date range filter
654
        "--from", dest="from_ts", type="datetime",
655
        default=None, metavar="TIMESTAMP",
656
        help="Ignore log entries before TIMESTAMP")
657
    parser.add_option(
658
        "--until", dest="until_ts", type="datetime",
659
        default=None, metavar="TIMESTAMP",
660
        help="Ignore log entries after TIMESTAMP")
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
661
    parser.add_option(
12929.6.12 by Francis J. Lacoste
Add a report with only the partition categories in.
662
        "--no-partition", dest="partition",
663
        action="store_false", default=True,
664
        help="Do not produce partition report")
665
    parser.add_option(
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
666
        "--no-categories", dest="categories",
667
        action="store_false", default=True,
668
        help="Do not produce categories report")
669
    parser.add_option(
670
        "--no-pageids", dest="pageids",
671
        action="store_false", default=True,
672
        help="Do not produce pageids report")
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
673
    parser.add_option(
10209.2.48 by Stuart Bishop
Top N URLs by hit count
674
        "--top-urls", dest="top_urls", type=int, metavar="N",
10209.3.5 by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts
675
        default=50, help="Generate report for top N urls by hitcount.")
10209.2.48 by Stuart Bishop
Top N URLs by hit count
676
    parser.add_option(
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
677
        "--directory", dest="directory",
678
        default=os.getcwd(), metavar="DIR",
679
        help="Output reports in DIR directory")
11318.6.1 by Robert Collins
Generate a timeout candidates report too
680
    parser.add_option(
681
        "--timeout", dest="timeout",
12929.6.3 by Francis J. Lacoste
Change the default timeout to production value, improved options documentation and use only one bin above timeout value.
682
        # Default to 9: our production timeout.
683
        default=9, type="int", metavar="SECONDS",
684
        help="The configured timeout value: used to determine high risk " +
685
        "page ids. That would be pages which 99% under render time is "
686
        "greater than timeoout - 2s. Default is %defaults.")
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
687
    parser.add_option(
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
688
        "--histogram-resolution", dest="resolution",
689
        # Default to 0.5s
690
        default=0.5, type="float", metavar="SECONDS",
691
        help="The resolution of the histogram bin width. Detault to "
692
        "%defaults.")
11775.2.27 by Francis J. Lacoste
Implement --merge option.
693
    parser.add_option(
694
        "--merge", dest="merge",
695
        default=False, action='store_true',
12929.6.3 by Francis J. Lacoste
Change the default timeout to production value, improved options documentation and use only one bin above timeout value.
696
        help="Files are interpreted as pickled stats and are aggregated " +
697
        "for the report.")
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
698
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
699
    options, args = parser.parse_args()
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
700
701
    if not os.path.isdir(options.directory):
702
        parser.error("Directory %s does not exist" % options.directory)
703
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
704
    if len(args) == 0:
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
705
        parser.error("At least one zserver tracelog file must be provided")
706
10209.2.7 by Stuart Bishop
Date range filter
707
    if options.from_ts is not None and options.until_ts is not None:
708
        if options.from_ts > options.until_ts:
709
            parser.error(
710
                "--from timestamp %s is before --until timestamp %s"
711
                % (options.from_ts, options.until_ts))
11775.2.27 by Francis J. Lacoste
Implement --merge option.
712
    if options.from_ts is not None or options.until_ts is not None:
713
        if options.merge:
714
            parser.error('--from and --until cannot be used with --merge')
10209.2.7 by Stuart Bishop
Date range filter
715
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
716
    for filename in args:
717
        if not os.path.exists(filename):
718
            parser.error("Tracelog file %s not found." % filename)
719
720
    if not os.path.exists(options.config):
721
        parser.error("Config file %s not found." % options.config)
722
10209.2.20 by Stuart Bishop
Basic report ordering
723
    # Need a better config mechanism as ConfigParser doesn't preserve order.
10209.2.3 by Stuart Bishop
HTML report + JS graphs
724
    script_config = RawConfigParser()
725
    script_config.optionxform = str # Make keys case sensitive.
726
    script_config.readfp(open(options.config))
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
727
728
    categories = [] # A list of Category, in report order.
10209.2.3 by Stuart Bishop
HTML report + JS graphs
729
    for option in script_config.options('categories'):
730
        regexp = script_config.get('categories', option)
10209.2.14 by Stuart Bishop
Handle bad regexps
731
        try:
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
732
            categories.append(Category(option, regexp))
10209.2.14 by Stuart Bishop
Handle bad regexps
733
        except sre_constants.error, x:
734
            log.fatal("Unable to compile regexp %r (%s)" % (regexp, x))
735
            return 1
10209.2.20 by Stuart Bishop
Basic report ordering
736
    categories.sort()
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
737
738
    if len(categories) == 0:
739
        parser.error("No data in [categories] section of configuration.")
740
12929.6.5 by Francis J. Lacoste
Add support for defining categories that should represent a partition.
741
    # Determine the categories making a partition of the requests
742
    for option in script_config.options('partition'):
743
        for category in categories:
744
            if category.title == option:
745
                category.partition = True
746
                break
747
        else:
748
            log.warning(
749
                "In partition definition: %s isn't a defined category",
750
                option)
751
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
752
    times = RequestTimes(categories, options)
753
11775.2.27 by Francis J. Lacoste
Implement --merge option.
754
    if options.merge:
755
        for filename in args:
756
            log.info('Merging %s...' % filename)
757
            f = bz2.BZ2File(filename, 'r')
758
            times += cPickle.load(f)
759
            f.close()
760
    else:
761
        parse(args, times, options)
11775.2.18 by Francis J. Lacoste
Compute reports using only one pass over the data.
762
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
763
    category_times = times.get_category_times()
764
765
    pageid_times = []
766
    url_times= []
10209.2.48 by Stuart Bishop
Top N URLs by hit count
767
    if options.top_urls:
11775.2.21 by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache.
768
        url_times = times.get_top_urls_times()
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
769
    if options.pageids:
770
        pageid_times = times.get_pageid_times()
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
771
11318.6.1 by Robert Collins
Generate a timeout candidates report too
772
    def _report_filename(filename):
773
        return os.path.join(options.directory, filename)
774
12929.6.12 by Francis J. Lacoste
Add a report with only the partition categories in.
775
    # Partition report
776
    if options.partition:
777
        report_filename = _report_filename('partition.html')
778
        log.info("Generating %s", report_filename)
779
        partition_times = [
780
            category_time
781
            for category_time in category_times
782
            if category_time[0].partition]
783
        html_report(
784
            open(report_filename, 'w'), partition_times, None, None,
785
            histogram_resolution=options.resolution,
786
            category_name='Partition')
787
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
788
    # Category only report.
789
    if options.categories:
11318.6.1 by Robert Collins
Generate a timeout candidates report too
790
        report_filename = _report_filename('categories.html')
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
791
        log.info("Generating %s", report_filename)
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
792
        html_report(
793
            open(report_filename, 'w'), category_times, None, None,
794
            histogram_resolution=options.resolution)
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
795
796
    # Pageid only report.
797
    if options.pageids:
11318.6.1 by Robert Collins
Generate a timeout candidates report too
798
        report_filename = _report_filename('pageids.html')
10209.2.48 by Stuart Bishop
Top N URLs by hit count
799
        log.info("Generating %s", report_filename)
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
800
        html_report(
801
            open(report_filename, 'w'), None, pageid_times, None,
802
            histogram_resolution=options.resolution)
10209.2.48 by Stuart Bishop
Top N URLs by hit count
803
804
    # Top URL only report.
805
    if options.top_urls:
10209.3.5 by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts
806
        report_filename = _report_filename('top%d.html' % options.top_urls)
10209.2.48 by Stuart Bishop
Top N URLs by hit count
807
        log.info("Generating %s", report_filename)
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
808
        html_report(
809
            open(report_filename, 'w'), None, None, url_times,
810
            histogram_resolution=options.resolution)
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
811
812
    # Combined report.
813
    if options.categories and options.pageids:
11318.6.1 by Robert Collins
Generate a timeout candidates report too
814
        report_filename = _report_filename('combined.html')
10209.2.48 by Stuart Bishop
Top N URLs by hit count
815
        html_report(
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
816
            open(report_filename, 'w'),
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
817
            category_times, pageid_times, url_times, 
818
            histogram_resolution=options.resolution)
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
819
11318.6.1 by Robert Collins
Generate a timeout candidates report too
820
    # Report of likely timeout candidates
821
    report_filename = _report_filename('timeout-candidates.html')
10209.3.5 by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts
822
    log.info("Generating %s", report_filename)
823
    html_report(
824
        open(report_filename, 'w'), None, pageid_times, None,
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
825
        options.timeout - 2, 
826
        histogram_resolution=options.resolution)
11318.6.1 by Robert Collins
Generate a timeout candidates report too
827
11775.2.22 by Francis J. Lacoste
Save pickle of the stats file for fast merging.
828
    # Save the times cache for later merging.
829
    report_filename = _report_filename('stats.pck.bz2')
830
    log.info("Saving times database in %s", report_filename)
831
    stats_file = bz2.BZ2File(report_filename, 'w')
832
    cPickle.dump(times, stats_file, protocol=cPickle.HIGHEST_PROTOCOL)
833
    stats_file.close()
834
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
835
    # Output metrics for selected categories.
836
    report_filename = _report_filename('metrics.dat')
837
    log.info('Saving category_metrics %s', report_filename)
838
    metrics_file = open(report_filename, 'w')
839
    writer = csv.writer(metrics_file, delimiter=':')
840
    date = options.until_ts or options.from_ts or datetime.utcnow()
841
    date = time.mktime(date.timetuple())
842
843
    for option in script_config.options('metrics'):
844
        name = script_config.get('metrics', option)
845
        for category, stats in category_times:
846
            if category.title == name:
847
                writer.writerows([
848
                    ("%s_99" % option, "%f@%d" % (
849
                        stats.ninetyninth_percentile_time, date)),
12929.6.4 by Francis J. Lacoste
Drop the unintersting mean metrics, but do record the number of hits.
850
                    ("%s_hits" % option, "%d@%d" % (stats.total_hits, date))])
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
851
                break
11775.2.31 by Francis J. Lacoste
Clarity improvements.
852
        else:
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
853
            log.warning("Can't find category %s for metric %s" % (
854
                option, name))
855
    metrics_file.close()
856
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
857
    return 0
858
859
10209.2.11 by Stuart Bishop
Handle real data
860
def smart_open(filename, mode='r'):
861
    """Open a file, transparently handling compressed files.
862
863
    Compressed files are detected by file extension.
864
    """
865
    ext = os.path.splitext(filename)[1]
866
    if ext == '.bz2':
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
867
        return bz2.BZ2File(filename, 'r')
10209.2.11 by Stuart Bishop
Handle real data
868
    elif ext == '.gz':
11775.2.19 by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint.
869
        return gzip.GzipFile(filename, 'r')
10209.2.11 by Stuart Bishop
Handle real data
870
    else:
871
        return open(filename, mode)
872
873
874
class MalformedLine(Exception):
875
    """A malformed line was found in the trace log."""
876
877
878
_ts_re = re.compile(
879
    '^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.(\d{6}))?$')
880
881
882
def parse_timestamp(ts_string):
883
    match = _ts_re.search(ts_string)
884
    if match is None:
885
        raise ValueError("Invalid timestamp")
886
    return datetime(
887
        *(int(elem) for elem in match.groups() if elem is not None))
888
889
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
890
def parse(tracefiles, times, options):
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
891
    requests = {}
10209.2.13 by Stuart Bishop
Status messages
892
    total_requests = 0
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
893
    for tracefile in tracefiles:
10209.2.13 by Stuart Bishop
Status messages
894
        log.info('Processing %s', tracefile)
10209.2.11 by Stuart Bishop
Handle real data
895
        for line in smart_open(tracefile):
896
            line = line.rstrip()
897
            try:
10209.2.22 by Stuart Bishop
Emit pageid to the ZServer tracelog
898
                record = line.split(' ', 7)
10209.2.11 by Stuart Bishop
Handle real data
899
                try:
900
                    record_type, request_id, date, time_ = record[:4]
901
                except ValueError:
902
                    raise MalformedLine()
903
904
                if record_type == 'S':
905
                    # Short circuit - we don't care about these entries.
906
                    continue
907
908
                # Parse the timestamp.
909
                ts_string = '%s %s' % (date, time_)
910
                try:
911
                    dt = parse_timestamp(ts_string)
912
                except ValueError:
913
                    raise MalformedLine(
914
                        'Invalid timestamp %s' % repr(ts_string))
915
916
                # Filter entries by command line date range.
917
                if options.from_ts is not None and dt < options.from_ts:
918
                    continue # Skip to next line.
919
                if options.until_ts is not None and dt > options.until_ts:
920
                    break # Skip to next log file.
921
922
                args = record[4:]
923
924
                def require_args(count):
925
                    if len(args) < count:
926
                        raise MalformedLine()
927
928
                if record_type == 'B': # Request begins.
929
                    require_args(2)
930
                    requests[request_id] = Request(dt, args[0], args[1])
931
                    continue
932
933
                request = requests.get(request_id, None)
934
                if request is None: # Just ignore partial records.
935
                    continue
936
10209.2.23 by Stuart Bishop
Review feedback and fixes
937
                # Old stype extension record from Launchpad. Just
938
                # contains the URL.
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
939
                if (record_type == '-' and len(args) == 1
940
                    and args[0].startswith('http')):
10209.2.23 by Stuart Bishop
Review feedback and fixes
941
                    request.url = args[0]
10209.2.22 by Stuart Bishop
Emit pageid to the ZServer tracelog
942
10209.2.23 by Stuart Bishop
Review feedback and fixes
943
                # New style extension record with a prefix.
944
                elif record_type == '-':
10209.2.22 by Stuart Bishop
Emit pageid to the ZServer tracelog
945
                    # Launchpad outputs several things as tracelog
946
                    # extension records. We include a prefix to tell
947
                    # them apart.
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
948
                    require_args(1)
10209.2.22 by Stuart Bishop
Emit pageid to the ZServer tracelog
949
10209.2.23 by Stuart Bishop
Review feedback and fixes
950
                    parse_extension_record(request, args)
10209.2.11 by Stuart Bishop
Handle real data
951
952
                elif record_type == 'I': # Got request input.
953
                    require_args(1)
954
                    request.I(dt, args[0])
955
956
                elif record_type == 'C': # Entered application thread.
957
                    request.C(dt)
958
959
                elif record_type == 'A': # Application done.
960
                    require_args(2)
961
                    request.A(dt, args[0], args[1])
962
963
                elif record_type == 'E': # Request done.
964
                    del requests[request_id]
965
                    request.E(dt)
10209.2.13 by Stuart Bishop
Status messages
966
                    total_requests += 1
967
                    if total_requests % 10000 == 0:
968
                        log.debug("Parsed %d requests", total_requests)
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
969
970
                    # Add the request to any matching categories.
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
971
                    times.add_request(request)
10209.2.11 by Stuart Bishop
Handle real data
972
                else:
973
                    raise MalformedLine('Unknown record type %s', record_type)
974
            except MalformedLine, x:
975
                log.error(
10209.2.40 by Stuart Bishop
Bug
976
                    "Malformed line %s (%s)" % (repr(line), x))
10209.2.2 by Stuart Bishop
Basic page performance stats, although they are currently wrong
977
10209.2.7 by Stuart Bishop
Date range filter
978
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
979
def parse_extension_record(request, args):
10209.2.23 by Stuart Bishop
Review feedback and fixes
980
    """Decode a ZServer extension records and annotate request."""
981
    prefix = args[0]
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
982
10209.2.23 by Stuart Bishop
Review feedback and fixes
983
    if prefix == 'u':
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
984
        request.url = ' '.join(args[1:]) or None
10209.2.23 by Stuart Bishop
Review feedback and fixes
985
    elif prefix == 'p':
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
986
        request.pageid = ' '.join(args[1:]) or None
987
    elif prefix == 't':
988
        if len(args) != 4:
989
            raise MalformedLine("Wrong number of arguments %s" % (args,))
10209.2.46 by Stuart Bishop
Spool numbers to disk instead of keeping in ram
990
        request.sql_statements = int(args[2])
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
991
        request.sql_seconds = float(args[3]) / 1000
10209.2.23 by Stuart Bishop
Review feedback and fixes
992
    else:
993
        raise MalformedLine(
994
            "Unknown extension prefix %s" % prefix)
995
996
10209.3.5 by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts
997
def html_report(
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
998
    outf, category_times, pageid_times, url_times,
12929.6.12 by Francis J. Lacoste
Add a report with only the partition categories in.
999
    ninetyninth_percentile_threshold=None, histogram_resolution=0.5,
1000
    category_name='Category'):
11318.6.1 by Robert Collins
Generate a timeout candidates report too
1001
    """Write an html report to outf.
1002
1003
    :param outf: A file object to write the report to.
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1004
    :param category_times: The time statistics for categories.
11318.6.1 by Robert Collins
Generate a timeout candidates report too
1005
    :param pageid_times: The time statistics for pageids.
10209.3.5 by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts
1006
    :param url_times: The time statistics for the top XXX urls.
11318.6.1 by Robert Collins
Generate a timeout candidates report too
1007
    :param ninetyninth_percentile_threshold: Lower threshold for inclusion of
1008
        pages in the pageid section; pages where 99 percent of the requests are
1009
        served under this threshold will not be included.
12929.6.12 by Francis J. Lacoste
Add a report with only the partition categories in.
1010
    :param histogram_resolution: used as the histogram bar width
1011
    :param category_name: The name to use for category report. Defaults to
1012
        'Category'.
11318.6.1 by Robert Collins
Generate a timeout candidates report too
1013
    """
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1014
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1015
    print >> outf, dedent('''\
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1016
        <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
1017
                "http://www.w3.org/TR/html4/loose.dtd">
1018
        <html>
1019
        <head>
1020
        <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
1021
        <title>Launchpad Page Performance Report %(date)s</title>
1022
        <script language="javascript" type="text/javascript"
13636.2.4 by Raphael Badin
Change JS files location.
1023
            src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.min.js"
1024
            ></script>
1025
        <script language="javascript" type="text/javascript"
1026
            src="https://devpad.canonical.com/~lpqateam/ppr/js/jquery.appear-1.1.1.min.js"
1027
            ></script>
1028
        <script language="javascript" type="text/javascript"
1029
            src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.flot.min.js"
1030
            ></script>
1031
        <script language="javascript" type="text/javascript"
1032
            src="https://devpad.canonical.com/~lpqateam/ppr/js/sorttable.js"></script>
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1033
        <style type="text/css">
12384.1.1 by Curtis Hovey
Switch font-size from percentages to ems.
1034
            h3 { font-weight: normal; font-size: 1em; }
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1035
            thead th { padding-left: 1em; padding-right: 1em; }
12929.6.7 by Francis J. Lacoste
Minimize cell title width.
1036
            .category-title { text-align: right; padding-right: 2em;
1037
                              max-width: 25em; }
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1038
            .regexp { font-size: x-small; font-weight: normal; }
1039
            .mean { text-align: right; padding-right: 1em; }
1040
            .median { text-align: right; padding-right: 1em; }
1041
            .standard-deviation { text-align: right; padding-right: 1em; }
10209.2.17 by Stuart Bishop
Tweak logarithmic scale
1042
            .histogram { padding: 0.5em 1em; width:400px; height:250px; }
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1043
            .odd-row { background-color: #eeeeff; }
1044
            .even-row { background-color: #ffffee; }
10209.2.28 by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second
1045
            table.sortable thead {
1046
                background-color:#eee;
1047
                color:#666666;
1048
                font-weight: bold;
1049
                cursor: default;
1050
                }
10209.2.30 by Stuart Bishop
Formatting
1051
            td.numeric {
1052
                font-family: monospace;
1053
                text-align: right;
1054
                padding: 1em;
1055
                }
1056
            .clickable { cursor: hand; }
12929.6.2 by Francis J. Lacoste
Use proper CSS class name.
1057
            .total-hits, .histogram, .median-sqltime,
1058
            .median-sqlstatements { border-right: 1px dashed #000000; }
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1059
        </style>
1060
        </head>
1061
        <body>
1062
        <h1>Launchpad Page Performance Report</h1>
10209.2.31 by Stuart Bishop
Fix closing tag
1063
        <h3>%(date)s</h3>
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1064
        ''' % {'date': time.ctime()})
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1065
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1066
    table_header = dedent('''\
10209.2.28 by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second
1067
        <table class="sortable page-performance-report">
10209.2.33 by Stuart Bishop
Add a caption indicating column headings are clickable
1068
        <caption align="top">Click on column headings to sort.</caption>
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1069
        <thead>
1070
            <tr>
10209.2.30 by Stuart Bishop
Formatting
1071
            <th class="clickable">Name</th>
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
1072
1073
            <th class="clickable">Total Hits</th>
1074
10209.3.5 by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts
1075
            <th class="clickable">99% Under Time (secs)</th>
1076
10209.2.30 by Stuart Bishop
Formatting
1077
            <th class="clickable">Mean Time (secs)</th>
10209.2.37 by Stuart Bishop
Add variance by request
1078
            <th class="clickable">Time Standard Deviation</th>
10209.2.30 by Stuart Bishop
Formatting
1079
            <th class="clickable">Median Time (secs)</th>
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
1080
            <th class="sorttable_nosort">Time Distribution</th>
1081
12929.6.1 by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under
1082
            <th class="clickable">99% Under SQL Time (secs)</th>
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
1083
            <th class="clickable">Mean SQL Time (secs)</th>
1084
            <th class="clickable">SQL Time Standard Deviation</th>
1085
            <th class="clickable">Median SQL Time (secs)</th>
1086
12929.6.1 by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under
1087
            <th class="clickable">99% Under SQL Statements</th>
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
1088
            <th class="clickable">Mean SQL Statements</th>
1089
            <th class="clickable">SQL Statement Standard Deviation</th>
1090
            <th class="clickable">Median SQL Statements</th>
1091
12929.6.17 by Francis J. Lacoste
Better column spelling.
1092
            <th class="clickable">Hits * 99% Under SQL Statement</th>
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1093
            </tr>
1094
        </thead>
1095
        <tbody>
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1096
        ''')
1097
    table_footer = "</tbody></table>"
1098
1099
    # Store our generated histograms to output Javascript later.
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1100
    histograms = []
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1101
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1102
    def handle_times(html_title, stats):
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
1103
        histograms.append(stats.histogram)
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1104
        print >> outf, dedent("""\
10209.2.28 by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second
1105
            <tr>
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1106
            <th class="category-title">%s</th>
12929.6.2 by Francis J. Lacoste
Use proper CSS class name.
1107
            <td class="numeric total-hits">%d</td>
1108
            <td class="numeric 99pc-under-time">%.2f</td>
1109
            <td class="numeric mean-time">%.2f</td>
1110
            <td class="numeric std-time">%.2f</td>
1111
            <td class="numeric median-time">%.2f</td>
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1112
            <td>
1113
                <div class="histogram" id="histogram%d"></div>
1114
            </td>
12929.6.2 by Francis J. Lacoste
Use proper CSS class name.
1115
            <td class="numeric 99pc-under-sqltime">%.2f</td>
1116
            <td class="numeric mean-sqltime">%.2f</td>
1117
            <td class="numeric std-sqltime">%.2f</td>
1118
            <td class="numeric median-sqltime">%.2f</td>
10209.2.35 by Stuart Bishop
Add sql statements and times to page-performance-report
1119
12929.6.2 by Francis J. Lacoste
Use proper CSS class name.
1120
            <td class="numeric 99pc-under-sqlstatement">%.f</td>
1121
            <td class="numeric mean-sqlstatements">%.2f</td>
1122
            <td class="numeric std-sqlstatements">%.2f</td>
1123
            <td class="numeric median-sqlstatements">%.2f</td>
12929.6.16 by Francis J. Lacoste
Remove Total time column and add a Hits * 99% under SQL Statements column
1124
1125
            <td class="numeric high-db-usage">%.f</td>
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1126
            </tr>
1127
            """ % (
10209.2.28 by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second
1128
                html_title,
12929.6.16 by Francis J. Lacoste
Remove Total time column and add a Hits * 99% under SQL Statements column
1129
                stats.total_hits, stats.ninetyninth_percentile_time,
11775.2.3 by Francis J. Lacoste
No point in displaying both variance and std.
1130
                stats.mean, stats.std, stats.median,
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1131
                len(histograms) - 1,
12929.6.1 by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under
1132
                stats.ninetyninth_percentile_sqltime, stats.mean_sqltime,
11775.2.3 by Francis J. Lacoste
No point in displaying both variance and std.
1133
                stats.std_sqltime, stats.median_sqltime,
12929.6.1 by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under
1134
                stats.ninetyninth_percentile_sqlstatements,
1135
                stats.mean_sqlstatements,
12929.6.16 by Francis J. Lacoste
Remove Total time column and add a Hits * 99% under SQL Statements column
1136
                stats.std_sqlstatements, stats.median_sqlstatements,
1137
                stats.ninetyninth_percentile_sqlstatements* stats.total_hits,
1138
                ))
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1139
10209.2.30 by Stuart Bishop
Formatting
1140
    # Table of contents
10209.2.48 by Stuart Bishop
Top N URLs by hit count
1141
    print >> outf, '<ol>'
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1142
    if category_times:
12929.6.12 by Francis J. Lacoste
Add a report with only the partition categories in.
1143
        print >> outf, '<li><a href="#catrep">%s Report</a></li>' % (
1144
            category_name)
10209.2.48 by Stuart Bishop
Top N URLs by hit count
1145
    if pageid_times:
1146
        print >> outf, '<li><a href="#pageidrep">Pageid Report</a></li>'
1147
    if url_times:
1148
        print >> outf, '<li><a href="#topurlrep">Top URL Report</a></li>'
1149
    print >> outf, '</ol>'
10209.2.30 by Stuart Bishop
Formatting
1150
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1151
    if category_times:
12929.6.12 by Francis J. Lacoste
Add a report with only the partition categories in.
1152
        print >> outf, '<h2 id="catrep">%s Report</h2>' % (
1153
            category_name)
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1154
        print >> outf, table_header
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1155
        for category, times in category_times:
10209.2.28 by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second
1156
            html_title = '%s<br/><span class="regexp">%s</span>' % (
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1157
                html_quote(category.title), html_quote(category.regexp))
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1158
            handle_times(html_title, times)
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1159
        print >> outf, table_footer
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1160
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1161
    if pageid_times:
1162
        print >> outf, '<h2 id="pageidrep">Pageid Report</h2>'
1163
        print >> outf, table_header
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1164
        for pageid, times in pageid_times:
11318.6.1 by Robert Collins
Generate a timeout candidates report too
1165
            if (ninetyninth_percentile_threshold is not None and
11775.2.4 by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports.
1166
                (times.ninetyninth_percentile_time <
11318.6.1 by Robert Collins
Generate a timeout candidates report too
1167
                ninetyninth_percentile_threshold)):
1168
                continue
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1169
            handle_times(html_quote(pageid), times)
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1170
        print >> outf, table_footer
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1171
10209.2.48 by Stuart Bishop
Top N URLs by hit count
1172
    if url_times:
1173
        print >> outf, '<h2 id="topurlrep">Top URL Report</h2>'
1174
        print >> outf, table_header
1175
        for url, times in url_times:
1176
            handle_times(html_quote(url), times)
1177
        print >> outf, table_footer
1178
10209.2.25 by Stuart Bishop
Pageids in addition to url regexp categories
1179
    # Ourput the javascript to render our histograms nicely, replacing
1180
    # the placeholder <div> tags output earlier.
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1181
    print >> outf, dedent("""\
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1182
        <script language="javascript" type="text/javascript">
1183
        $(function () {
1184
            var options = {
1185
                series: {
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
1186
                    bars: {show: true, barWidth: %s}
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1187
                    },
1188
                xaxis: {
10209.2.5 by Stuart Bishop
Polish
1189
                    tickFormatter: function (val, axis) {
1190
                        return val.toFixed(axis.tickDecimals) + "s";
1191
                        }
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1192
                    },
1193
                yaxis: {
1194
                    min: 0,
1195
                    max: 1,
10209.2.16 by Stuart Bishop
Try a logarithmic scale to improve readability
1196
                    transform: function (v) {
10209.2.17 by Stuart Bishop
Tweak logarithmic scale
1197
                        return Math.pow(Math.log(v*100+1)/Math.LN2, 0.5);
10209.2.16 by Stuart Bishop
Try a logarithmic scale to improve readability
1198
                        },
1199
                    inverseTransform: function (v) {
10209.2.17 by Stuart Bishop
Tweak logarithmic scale
1200
                        return Math.pow(Math.exp(v*100+1)/Math.LN2, 2);
10209.2.16 by Stuart Bishop
Try a logarithmic scale to improve readability
1201
                        },
1202
                    tickDecimals: 1,
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1203
                    tickFormatter: function (val, axis) {
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
1204
                        return (val * 100).toFixed(axis.tickDecimals) + "%%";
10209.2.16 by Stuart Bishop
Try a logarithmic scale to improve readability
1205
                        },
1206
                    ticks: [0.001,0.01,0.10,0.50,1.0]
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1207
                    },
1208
                grid: {
1209
                    aboveData: true,
1210
                    labelMargin: 15
1211
                    }
1212
                };
12929.6.11 by Francis J. Lacoste
Fix bar width on the histogram.
1213
        """ % histogram_resolution)
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1214
10209.2.11 by Stuart Bishop
Handle real data
1215
    for i, histogram in enumerate(histograms):
12929.6.10 by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class.
1216
        if histogram.count == 0:
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1217
            continue
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1218
        print >> outf, dedent("""\
13636.2.2 by Raphael Badin
Improve js string generation.
1219
            function plot_histogram_%(id)d() {
1220
                var d = %(data)s;
13636.2.1 by Raphael Badin
Only display graphs when they are viewed.
1221
1222
                $.plot(
13636.2.2 by Raphael Badin
Improve js string generation.
1223
                    $("#histogram%(id)d"),
13636.2.1 by Raphael Badin
Only display graphs when they are viewed.
1224
                    [{data: d}], options);
1225
            }
13636.2.2 by Raphael Badin
Improve js string generation.
1226
            $('#histogram%(id)d').appear(function() {
1227
                plot_histogram_%(id)d();
13636.2.1 by Raphael Badin
Only display graphs when they are viewed.
1228
            });
1229
13636.2.2 by Raphael Badin
Improve js string generation.
1230
            """ % {'id': i, 'data': json.dumps(histogram.bins_relative)})
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1231
10209.2.43 by Stuart Bishop
Generate pageids, categories and combined reports simultaneously
1232
    print >> outf, dedent("""\
10209.2.3 by Stuart Bishop
HTML report + JS graphs
1233
            });
1234
        </script>
1235
        </body>
1236
        </html>
1237
        """)