10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
1 |
# Copyright 2010 Canonical Ltd. This software is licensed under the
|
2 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
|
3 |
||
4 |
"""Page performance report generated from zserver trace logs."""
|
|
5 |
||
6 |
__metaclass__ = type |
|
10209.2.11
by Stuart Bishop
Handle real data |
7 |
__all__ = ['main'] |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
8 |
|
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
9 |
import bz2 |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
10 |
from cgi import escape as html_quote |
7675.916.98
by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes. |
11 |
from ConfigParser import RawConfigParser |
7675.991.2
by Jeroen Vermeulen
Roll back lp:~launchpad/launchpad/recife. |
12 |
import copy |
7675.916.98
by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes. |
13 |
import cPickle |
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
14 |
import csv |
10209.2.11
by Stuart Bishop
Handle real data |
15 |
from datetime import datetime |
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
16 |
import gzip |
17 |
import math |
|
11403.1.4
by Henning Eggers
Reformatted imports using format-imports script r32. |
18 |
import os.path |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
19 |
import re |
7675.916.98
by Henning Eggers
Merged db-stable at r10026 (recife roll-back) but without accepting the changes. |
20 |
import textwrap |
7675.991.2
by Jeroen Vermeulen
Roll back lp:~launchpad/launchpad/recife. |
21 |
from textwrap import dedent |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
22 |
import time |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
23 |
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
24 |
import simplejson as json |
11403.1.4
by Henning Eggers
Reformatted imports using format-imports script r32. |
25 |
import sre_constants |
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
26 |
import zc.zservertracelog.tracereport |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
27 |
|
10209.2.4
by Stuart Bishop
Shuffle files |
28 |
from canonical.config import config |
10209.2.11
by Stuart Bishop
Handle real data |
29 |
from canonical.launchpad.scripts.logger import log |
10209.2.7
by Stuart Bishop
Date range filter |
30 |
from lp.scripts.helpers import LPOptionParser |
10209.2.4
by Stuart Bishop
Shuffle files |
31 |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
32 |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
33 |
class Request(zc.zservertracelog.tracereport.Request): |
34 |
url = None |
|
35 |
pageid = None |
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
36 |
ticks = None |
37 |
sql_statements = None |
|
38 |
sql_seconds = None |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
39 |
|
10209.2.28
by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second |
40 |
# Override the broken version in our superclass that always
|
41 |
# returns an integer.
|
|
42 |
@property
|
|
43 |
def app_seconds(self): |
|
44 |
interval = self.app_time - self.start_app_time |
|
45 |
return interval.seconds + interval.microseconds / 1000000.0 |
|
46 |
||
47 |
# Override the broken version in our superclass that always
|
|
48 |
# returns an integer.
|
|
49 |
@property
|
|
50 |
def total_seconds(self): |
|
51 |
interval = self.end - self.start |
|
52 |
return interval.seconds + interval.microseconds / 1000000.0 |
|
53 |
||
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
54 |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
55 |
class Category: |
10209.2.19
by Stuart Bishop
Docstrings and make regexps case insensitive |
56 |
"""A Category in our report.
|
57 |
||
58 |
Requests belong to a Category if the URL matches a regular expression.
|
|
59 |
"""
|
|
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
60 |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
61 |
def __init__(self, title, regexp): |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
62 |
self.title = title |
63 |
self.regexp = regexp |
|
10209.2.19
by Stuart Bishop
Docstrings and make regexps case insensitive |
64 |
self._compiled_regexp = re.compile(regexp, re.I | re.X) |
12929.6.5
by Francis J. Lacoste
Add support for defining categories that should represent a partition. |
65 |
self.partition = False |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
66 |
|
67 |
def match(self, request): |
|
68 |
"""Return true when the request match this category."""
|
|
69 |
return self._compiled_regexp.search(request.url) is not None |
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
70 |
|
10209.2.20
by Stuart Bishop
Basic report ordering |
71 |
def __cmp__(self, other): |
72 |
return cmp(self.title.lower(), other.title.lower()) |
|
73 |
||
11775.2.26
by Francis J. Lacoste
Implement __add__ for RequestTimes. |
74 |
def __deepcopy__(self, memo): |
75 |
# We provide __deepcopy__ because the module doesn't handle
|
|
76 |
# compiled regular expression by default.
|
|
77 |
return Category(self.title, self.regexp) |
|
78 |
||
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
79 |
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
80 |
class OnlineStatsCalculator: |
81 |
"""Object that can compute count, sum, mean, variance and median.
|
|
82 |
||
83 |
It computes these value incrementally and using minimal storage
|
|
84 |
using the Welford / Knuth algorithm described at
|
|
85 |
http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
|
|
86 |
"""
|
|
87 |
||
88 |
def __init__(self): |
|
89 |
self.count = 0 |
|
90 |
self.sum = 0 |
|
91 |
self.M2 = 0.0 # Sum of square difference |
|
92 |
self.mean = 0.0 |
|
93 |
||
94 |
def update(self, x): |
|
95 |
"""Incrementally update the stats when adding x to the set.
|
|
96 |
||
97 |
None values are ignored.
|
|
98 |
"""
|
|
99 |
if x is None: |
|
100 |
return
|
|
101 |
self.count += 1 |
|
102 |
self.sum += x |
|
103 |
delta = x - self.mean |
|
104 |
self.mean = float(self.sum)/self.count |
|
105 |
self.M2 += delta*(x - self.mean) |
|
106 |
||
107 |
@property
|
|
108 |
def variance(self): |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
109 |
"""Return the population variance."""
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
110 |
if self.count == 0: |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
111 |
return 0 |
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
112 |
else: |
113 |
return self.M2/self.count |
|
114 |
||
115 |
@property
|
|
116 |
def std(self): |
|
117 |
"""Return the standard deviation."""
|
|
118 |
if self.count == 0: |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
119 |
return 0 |
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
120 |
else: |
121 |
return math.sqrt(self.variance) |
|
122 |
||
11775.2.23
by Francis J. Lacoste
Implement adding two OnlineStatsCalculator. |
123 |
def __add__(self, other): |
124 |
"""Adds this and another OnlineStatsCalculator.
|
|
125 |
||
126 |
The result combines the stats of the two objects.
|
|
127 |
"""
|
|
128 |
results = OnlineStatsCalculator() |
|
129 |
results.count = self.count + other.count |
|
130 |
results.sum = self.sum + other.sum |
|
131 |
if self.count > 0 and other.count > 0: |
|
11775.2.29
by Francis J. Lacoste
Typo and lint. |
132 |
# This is 2.1b in Chan, Tony F.; Golub, Gene H.; LeVeque,
|
11775.2.23
by Francis J. Lacoste
Implement adding two OnlineStatsCalculator. |
133 |
# Randall J. (1979), "Updating Formulae and a Pairwise Algorithm
|
134 |
# for Computing Sample Variances.",
|
|
135 |
# Technical Report STAN-CS-79-773,
|
|
11775.2.29
by Francis J. Lacoste
Typo and lint. |
136 |
# Department of Computer Science, Stanford University,
|
11775.2.23
by Francis J. Lacoste
Implement adding two OnlineStatsCalculator. |
137 |
# ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf .
|
138 |
results.M2 = self.M2 + other.M2 + ( |
|
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
139 |
(float(self.count) / (other.count * results.count)) * |
140 |
((float(other.count) / self.count) * self.sum - other.sum)**2) |
|
11775.2.23
by Francis J. Lacoste
Implement adding two OnlineStatsCalculator. |
141 |
else: |
142 |
results.M2 = self.M2 + other.M2 # One of them is 0. |
|
143 |
if results.count > 0: |
|
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
144 |
results.mean = float(results.sum) / results.count |
11775.2.23
by Francis J. Lacoste
Implement adding two OnlineStatsCalculator. |
145 |
return results |
146 |
||
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
147 |
|
148 |
class OnlineApproximateMedian: |
|
149 |
"""Approximate the median of a set of elements.
|
|
150 |
||
151 |
This implements a space-efficient algorithm which only sees each value
|
|
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
152 |
once. (It will hold in memory log bucket_size of n elements.)
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
153 |
|
154 |
It was described and analysed in
|
|
11775.2.20
by Francis J. Lacoste
Typos. |
155 |
D. Cantone and M.Hofri,
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
156 |
"Analysis of An Approximate Median Selection Algorithm"
|
157 |
ftp://ftp.cs.wpi.edu/pub/techreports/pdf/06-17.pdf
|
|
158 |
||
159 |
This algorithm is similar to Tukey's median of medians technique.
|
|
11775.2.20
by Francis J. Lacoste
Typos. |
160 |
It will compute the median among bucket_size values. And the median among
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
161 |
those.
|
162 |
"""
|
|
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
163 |
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
164 |
def __init__(self, bucket_size=9): |
165 |
"""Creates a new estimator.
|
|
166 |
||
167 |
It approximates the median by finding the median among each
|
|
168 |
successive bucket_size element. And then using these medians for other
|
|
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
169 |
rounds of selection.
|
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
170 |
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
171 |
The bucket size should be a low odd-integer.
|
172 |
"""
|
|
173 |
self.bucket_size = bucket_size |
|
174 |
# Index of the median in a completed bucket.
|
|
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
175 |
self.median_idx = (bucket_size-1)//2 |
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
176 |
self.buckets = [] |
177 |
||
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
178 |
def update(self, x, order=0): |
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
179 |
"""Update with x."""
|
180 |
if x is None: |
|
181 |
return
|
|
182 |
||
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
183 |
i = order |
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
184 |
while True: |
185 |
# Create bucket on demand.
|
|
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
186 |
if i >= len(self.buckets): |
187 |
for n in range((i+1)-len(self.buckets)): |
|
188 |
self.buckets.append([]) |
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
189 |
bucket = self.buckets[i] |
190 |
bucket.append(x) |
|
191 |
if len(bucket) == self.bucket_size: |
|
192 |
# Select the median in this bucket, and promote it.
|
|
193 |
x = sorted(bucket)[self.median_idx] |
|
194 |
# Free the bucket for the next round.
|
|
195 |
del bucket[:] |
|
196 |
i += 1 |
|
197 |
continue
|
|
198 |
else: |
|
199 |
break
|
|
200 |
||
201 |
@property
|
|
202 |
def median(self): |
|
203 |
"""Return the median."""
|
|
204 |
# Find the 'weighted' median by assigning a weight to each
|
|
205 |
# element proportional to how far they have been selected.
|
|
206 |
candidates = [] |
|
207 |
total_weight = 0 |
|
208 |
for i, bucket in enumerate(self.buckets): |
|
209 |
weight = self.bucket_size ** i |
|
210 |
for x in bucket: |
|
211 |
total_weight += weight |
|
11775.4.1
by Francis J. Lacoste
Better algorithm for the weighted median. |
212 |
candidates.append([x, weight]) |
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
213 |
if len(candidates) == 0: |
214 |
return 0 |
|
11775.2.30
by Francis J. Lacoste
Merge reviewed changes. |
215 |
|
11775.4.1
by Francis J. Lacoste
Better algorithm for the weighted median. |
216 |
# Each weight is the equivalent of having the candidates appear
|
217 |
# that number of times in the array.
|
|
218 |
# So buckets like [[1, 2], [2, 3], [4, 2]] would be expanded to
|
|
219 |
# [1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4,
|
|
220 |
# 4, 4, 4, 4, 4] and we find the median of that list (2).
|
|
221 |
# We don't expand the items to conserve memory.
|
|
222 |
median = (total_weight-1) / 2 |
|
223 |
weighted_idx = 0 |
|
224 |
for x, weight in sorted(candidates): |
|
225 |
weighted_idx += weight |
|
226 |
if weighted_idx > median: |
|
227 |
return x |
|
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
228 |
|
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
229 |
def __add__(self, other): |
11775.2.29
by Francis J. Lacoste
Typo and lint. |
230 |
"""Merge two approximators together.
|
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
231 |
|
232 |
All candidates from the other are merged through the standard
|
|
233 |
algorithm, starting at the same level. So an item that went through
|
|
234 |
two rounds of selection, will be compared with other items having
|
|
235 |
gone through the same number of rounds.
|
|
236 |
"""
|
|
237 |
results = OnlineApproximateMedian(self.bucket_size) |
|
238 |
results.buckets = copy.deepcopy(self.buckets) |
|
11775.2.29
by Francis J. Lacoste
Typo and lint. |
239 |
for i, bucket in enumerate(other.buckets): |
11775.2.24
by Francis J. Lacoste
Implement merging to median approximator. |
240 |
for x in bucket: |
241 |
results.update(x, i) |
|
242 |
return results |
|
243 |
||
11775.2.17
by Francis J. Lacoste
Add object to compute stats (mean, sum, std, median) using on-line algorithm. |
244 |
|
10209.2.27
by Stuart Bishop
Total hits |
245 |
class Stats: |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
246 |
"""Bag to hold and compute request statistics.
|
10209.2.27
by Stuart Bishop
Total hits |
247 |
|
248 |
All times are in seconds.
|
|
249 |
"""
|
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
250 |
total_hits = 0 # Total hits. |
251 |
||
10209.2.27
by Stuart Bishop
Total hits |
252 |
total_time = 0 # Total time spent rendering. |
253 |
mean = 0 # Mean time per hit. |
|
254 |
median = 0 # Median time per hit. |
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
255 |
std = 0 # Standard deviation per hit. |
10209.2.27
by Stuart Bishop
Total hits |
256 |
histogram = None # # Request times histogram. |
257 |
||
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
258 |
total_sqltime = 0 # Total time spent waiting for SQL to process. |
259 |
mean_sqltime = 0 # Mean time spend waiting for SQL to process. |
|
260 |
median_sqltime = 0 # Median time spend waiting for SQL to process. |
|
261 |
std_sqltime = 0 # Standard deviation of SQL time. |
|
262 |
||
263 |
total_sqlstatements = 0 # Total number of SQL statements issued. |
|
264 |
mean_sqlstatements = 0 |
|
265 |
median_sqlstatements = 0 |
|
266 |
std_sqlstatements = 0 |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
267 |
|
11775.2.8
by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now. |
268 |
@property
|
269 |
def ninetyninth_percentile_time(self): |
|
270 |
"""Time under which 99% of requests are rendered.
|
|
271 |
||
11775.2.16
by Francis J. Lacoste
Typo. |
272 |
This is estimated as 3 std deviations from the mean. Given that
|
11775.2.8
by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now. |
273 |
in a daily report, many URLs or PageIds won't have 100 requests, it's
|
274 |
more useful to use this estimator.
|
|
10209.2.19
by Stuart Bishop
Docstrings and make regexps case insensitive |
275 |
"""
|
11775.2.8
by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now. |
276 |
return self.mean + 3*self.std |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
277 |
|
11775.2.11
by Francis J. Lacoste
Plots expect a relative histogram. |
278 |
@property
|
12929.6.1
by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under |
279 |
def ninetyninth_percentile_sqltime(self): |
280 |
"""SQL time under which 99% of requests are rendered.
|
|
281 |
||
282 |
This is estimated as 3 std deviations from the mean.
|
|
283 |
"""
|
|
284 |
return self.mean_sqltime + 3*self.std_sqltime |
|
285 |
||
286 |
@property
|
|
287 |
def ninetyninth_percentile_sqlstatements(self): |
|
288 |
"""Number of SQL statements under which 99% of requests are rendered.
|
|
289 |
||
290 |
This is estimated as 3 std deviations from the mean.
|
|
291 |
"""
|
|
292 |
return self.mean_sqlstatements + 3*self.std_sqlstatements |
|
293 |
||
11775.2.7
by Francis J. Lacoste
Add a text() method to Stats, increase the cache size to 400M and set a basic regression test. |
294 |
def text(self): |
295 |
"""Return a textual version of the stats."""
|
|
296 |
return textwrap.dedent(""" |
|
297 |
<Stats for %d requests: |
|
298 |
Time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f |
|
299 |
SQL time: total=%.2f; mean=%.2f; median=%.2f; std=%.2f |
|
300 |
SQL stmt: total=%.f; mean=%.2f; median=%.f; std=%.2f |
|
301 |
>""" % ( |
|
302 |
self.total_hits, self.total_time, self.mean, self.median, |
|
303 |
self.std, self.total_sqltime, self.mean_sqltime, |
|
304 |
self.median_sqltime, self.std_sqltime, |
|
305 |
self.total_sqlstatements, self.mean_sqlstatements, |
|
306 |
self.median_sqlstatements, self.std_sqlstatements)) |
|
307 |
||
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
308 |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
309 |
class OnlineStats(Stats): |
310 |
"""Implementation of stats that can be computed online.
|
|
311 |
||
312 |
You call update() for each request and the stats are updated incrementally
|
|
313 |
with minimum storage space.
|
|
314 |
"""
|
|
315 |
||
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
316 |
def __init__(self, histogram_width, histogram_resolution): |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
317 |
self.time_stats = OnlineStatsCalculator() |
318 |
self.time_median_approximate = OnlineApproximateMedian() |
|
319 |
self.sql_time_stats = OnlineStatsCalculator() |
|
320 |
self.sql_time_median_approximate = OnlineApproximateMedian() |
|
321 |
self.sql_statements_stats = OnlineStatsCalculator() |
|
322 |
self.sql_statements_median_approximate = OnlineApproximateMedian() |
|
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
323 |
self.histogram = Histogram(histogram_width, histogram_resolution) |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
324 |
|
325 |
@property
|
|
326 |
def total_hits(self): |
|
327 |
return self.time_stats.count |
|
328 |
||
329 |
@property
|
|
330 |
def total_time(self): |
|
331 |
return self.time_stats.sum |
|
332 |
||
333 |
@property
|
|
334 |
def mean(self): |
|
335 |
return self.time_stats.mean |
|
336 |
||
337 |
@property
|
|
338 |
def median(self): |
|
339 |
return self.time_median_approximate.median |
|
340 |
||
341 |
@property
|
|
342 |
def std(self): |
|
343 |
return self.time_stats.std |
|
344 |
||
345 |
@property
|
|
346 |
def total_sqltime(self): |
|
347 |
return self.sql_time_stats.sum |
|
348 |
||
349 |
@property
|
|
350 |
def mean_sqltime(self): |
|
351 |
return self.sql_time_stats.mean |
|
352 |
||
353 |
@property
|
|
354 |
def median_sqltime(self): |
|
355 |
return self.sql_time_median_approximate.median |
|
356 |
||
357 |
@property
|
|
358 |
def std_sqltime(self): |
|
359 |
return self.sql_time_stats.std |
|
360 |
||
361 |
@property
|
|
362 |
def total_sqlstatements(self): |
|
363 |
return self.sql_statements_stats.sum |
|
364 |
||
365 |
@property
|
|
366 |
def mean_sqlstatements(self): |
|
367 |
return self.sql_statements_stats.mean |
|
368 |
||
369 |
@property
|
|
370 |
def median_sqlstatements(self): |
|
371 |
return self.sql_statements_median_approximate.median |
|
372 |
||
373 |
@property
|
|
374 |
def std_sqlstatements(self): |
|
375 |
return self.sql_statements_stats.std |
|
376 |
||
377 |
def update(self, request): |
|
378 |
"""Update the stats based on request."""
|
|
379 |
self.time_stats.update(request.app_seconds) |
|
380 |
self.time_median_approximate.update(request.app_seconds) |
|
381 |
self.sql_time_stats.update(request.sql_seconds) |
|
382 |
self.sql_time_median_approximate.update(request.sql_seconds) |
|
383 |
self.sql_statements_stats.update(request.sql_statements) |
|
384 |
self.sql_statements_median_approximate.update(request.sql_statements) |
|
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
385 |
self.histogram.update(request.app_seconds) |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
386 |
|
11775.2.25
by Francis J. Lacoste
Allow adding two OnlineStats objects together. |
387 |
def __add__(self, other): |
388 |
"""Merge another OnlineStats with this one."""
|
|
389 |
results = copy.deepcopy(self) |
|
390 |
results.time_stats += other.time_stats |
|
391 |
results.time_median_approximate += other.time_median_approximate |
|
392 |
results.sql_time_stats += other.sql_time_stats |
|
393 |
results.sql_time_median_approximate += ( |
|
394 |
other.sql_time_median_approximate) |
|
395 |
results.sql_statements_stats += other.sql_statements_stats |
|
396 |
results.sql_statements_median_approximate += ( |
|
397 |
other.sql_statements_median_approximate) |
|
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
398 |
results.histogram = self.histogram + other.histogram |
11775.2.25
by Francis J. Lacoste
Allow adding two OnlineStats objects together. |
399 |
return results |
400 |
||
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
401 |
|
12929.6.9
by Francis J. Lacoste
Add Histogram and tests. |
402 |
class Histogram: |
403 |
"""A simple object to compute histogram of a value."""
|
|
404 |
||
405 |
@staticmethod
|
|
406 |
def from_bins_data(data): |
|
407 |
"""Create an histogram from existing bins data."""
|
|
408 |
assert data[0][0] == 0, "First bin should start at zero." |
|
409 |
||
410 |
hist = Histogram(len(data), data[1][0]) |
|
411 |
for idx, bin in enumerate(data): |
|
412 |
hist.count += bin[1] |
|
413 |
hist.bins[idx][1] = bin[1] |
|
414 |
||
415 |
return hist |
|
416 |
||
417 |
def __init__(self, bins_count, bins_size): |
|
418 |
"""Create a new histogram.
|
|
419 |
||
420 |
The histogram will count the frequency of values in bins_count bins
|
|
421 |
of bins_size each.
|
|
422 |
"""
|
|
423 |
self.count = 0 |
|
424 |
self.bins_count = bins_count |
|
425 |
self.bins_size = bins_size |
|
426 |
self.bins = [] |
|
427 |
for x in range(bins_count): |
|
428 |
self.bins.append([x*bins_size, 0]) |
|
429 |
||
430 |
@property
|
|
431 |
def bins_relative(self): |
|
432 |
"""Return the bins with the frequency expressed as a ratio."""
|
|
433 |
return [[x, float(f)/self.count] for x, f in self.bins] |
|
434 |
||
435 |
def update(self, value): |
|
436 |
"""Update the histogram for this value.
|
|
437 |
||
438 |
All values higher than the last bin minimum are counted in that last
|
|
439 |
bin.
|
|
440 |
"""
|
|
441 |
self.count += 1 |
|
442 |
idx = int(min(self.bins_count-1, value / self.bins_size)) |
|
443 |
self.bins[idx][1] += 1 |
|
444 |
||
445 |
def __repr__(self): |
|
446 |
"""A string representation of this histogram."""
|
|
447 |
return "<Histogram %s>" % self.bins |
|
448 |
||
449 |
def __eq__(self, other): |
|
450 |
"""Two histogram are equals if they have the same bins content."""
|
|
451 |
if not isinstance(other, Histogram): |
|
452 |
return False |
|
453 |
||
454 |
if self.bins_count != other.bins_count: |
|
455 |
return False |
|
456 |
||
457 |
if self.bins_size != other.bins_size: |
|
458 |
return False |
|
459 |
||
460 |
for idx, other_bin in enumerate(other.bins): |
|
461 |
if self.bins[idx][1] != other_bin[1]: |
|
462 |
return False |
|
463 |
||
464 |
return True |
|
465 |
||
466 |
def __add__(self, other): |
|
467 |
"""Add the frequency of the other histogram to this one.
|
|
468 |
||
469 |
The resulting histogram has the same bins_size than this one.
|
|
470 |
If the other one has a bigger bins_size, we'll assume an even
|
|
471 |
distribution and distribute the frequency across the smaller bins. If
|
|
472 |
it has a lower bin_size, we'll aggregate its bins into the larger
|
|
473 |
ones. We only support different bins_size if the ratio can be
|
|
474 |
expressed as the ratio between 1 and an integer.
|
|
475 |
||
476 |
The resulting histogram is as wide as the widest one.
|
|
477 |
"""
|
|
478 |
ratio = float(other.bins_size) / self.bins_size |
|
479 |
bins_count = max(self.bins_count, math.ceil(other.bins_count * ratio)) |
|
480 |
total = Histogram(int(bins_count), self.bins_size) |
|
481 |
total.count = self.count + other.count |
|
482 |
||
483 |
# Copy our bins into the total
|
|
484 |
for idx, bin in enumerate(self.bins): |
|
485 |
total.bins[idx][1] = bin[1] |
|
486 |
||
487 |
assert int(ratio) == ratio or int(1/ratio) == 1/ratio, ( |
|
488 |
"We only support different bins size when the ratio is an "
|
|
489 |
"integer to 1: "
|
|
490 |
% ratio) |
|
491 |
||
492 |
if ratio >= 1: |
|
493 |
# We distribute the frequency across the bins.
|
|
494 |
# For example. if the ratio is 3:1, we'll add a third
|
|
495 |
# of the lower resolution bin to 3 of the higher one.
|
|
496 |
for other_idx, bin in enumerate(other.bins): |
|
497 |
f = bin[1] / ratio |
|
498 |
start = int(math.floor(other_idx * ratio)) |
|
499 |
end = int(start + ratio) |
|
500 |
for idx in range(start, end): |
|
501 |
total.bins[idx][1] += f |
|
502 |
else: |
|
503 |
# We need to collect the higher resolution bins into the
|
|
504 |
# corresponding lower one.
|
|
505 |
for other_idx, bin in enumerate(other.bins): |
|
506 |
idx = int(other_idx * ratio) |
|
507 |
total.bins[idx][1] += bin[1] |
|
508 |
||
509 |
return total |
|
510 |
||
511 |
||
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
512 |
class RequestTimes: |
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
513 |
"""Collect statistics from requests.
|
514 |
||
515 |
Statistics are updated by calling the add_request() method.
|
|
516 |
||
517 |
Statistics for mean/stddev/total/median for request times, SQL times and
|
|
518 |
number of SQL statements are collected.
|
|
519 |
||
520 |
They are grouped by Category, URL or PageID.
|
|
521 |
"""
|
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
522 |
|
523 |
def __init__(self, categories, options): |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
524 |
self.by_pageids = options.pageids |
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
525 |
self.top_urls = options.top_urls |
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
526 |
# We only keep in memory 50 times the number of URLs we want to
|
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
527 |
# return. The number of URLs can go pretty high (because of the
|
528 |
# distinct query parameters).
|
|
529 |
#
|
|
530 |
# Keeping all in memory at once is prohibitive. On a small but
|
|
531 |
# representative sample, keeping 50 times the possible number of
|
|
532 |
# candidates and culling to 90% on overflow, generated an identical
|
|
533 |
# report than keeping all the candidates in-memory.
|
|
534 |
#
|
|
535 |
# Keeping 10 times or culling at 90% generated a near-identical report
|
|
536 |
# (it differed a little in the tail.)
|
|
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
537 |
#
|
538 |
# The size/cull parameters might need to change if the requests
|
|
539 |
# distribution become very different than what it currently is.
|
|
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
540 |
self.top_urls_cache_size = self.top_urls * 50 |
11775.2.8
by Francis J. Lacoste
Implement get_category_times() using raw SQL. Drop median computation for now. |
541 |
|
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
542 |
# Histogram has a bin per resolution up to our timeout
|
543 |
#(and an extra bin).
|
|
544 |
self.histogram_resolution = float(options.resolution) |
|
545 |
self.histogram_width = int( |
|
546 |
options.timeout / self.histogram_resolution) + 1 |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
547 |
self.category_times = [ |
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
548 |
(category, OnlineStats( |
549 |
self.histogram_width, self.histogram_resolution)) |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
550 |
for category in categories] |
551 |
self.url_times = {} |
|
552 |
self.pageid_times = {} |
|
11775.2.6
by Francis J. Lacoste
Add initial unit tests. Create a histogram join table and store missing values as Null. |
553 |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
554 |
def add_request(self, request): |
11775.4.2
by Francis J. Lacoste
Typos and docstring updates. |
555 |
"""Add request to the set of requests we collect stats for."""
|
12929.6.5
by Francis J. Lacoste
Add support for defining categories that should represent a partition. |
556 |
matched = [] |
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
557 |
for category, stats in self.category_times: |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
558 |
if category.match(request): |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
559 |
stats.update(request) |
12929.6.5
by Francis J. Lacoste
Add support for defining categories that should represent a partition. |
560 |
if category.partition: |
561 |
matched.append(category.title) |
|
562 |
||
563 |
if len(matched) > 1: |
|
564 |
log.warning( |
|
565 |
"Multiple partition categories matched by %s (%s)", |
|
566 |
request.url, ", ".join(matched)) |
|
567 |
elif not matched: |
|
568 |
log.warning("%s isn't part of the partition", request.url) |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
569 |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
570 |
if self.by_pageids: |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
571 |
pageid = request.pageid or 'Unknown' |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
572 |
stats = self.pageid_times.setdefault( |
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
573 |
pageid, OnlineStats( |
574 |
self.histogram_width, self.histogram_resolution)) |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
575 |
stats.update(request) |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
576 |
|
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
577 |
if self.top_urls: |
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
578 |
stats = self.url_times.setdefault( |
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
579 |
request.url, OnlineStats( |
580 |
self.histogram_width, self.histogram_resolution)) |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
581 |
stats.update(request) |
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
582 |
# Whenever we have more URLs than we need to, discard 10%
|
583 |
# that is less likely to end up in the top.
|
|
584 |
if len(self.url_times) > self.top_urls_cache_size: |
|
585 |
cutoff = int(self.top_urls_cache_size*0.90) |
|
586 |
self.url_times = dict( |
|
587 |
sorted(self.url_times.items(), |
|
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
588 |
key=lambda (url, stats): stats.total_time, |
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
589 |
reverse=True)[:cutoff]) |
590 |
||
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
591 |
def get_category_times(self): |
592 |
"""Return the times for each category."""
|
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
593 |
return self.category_times |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
594 |
|
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
595 |
def get_top_urls_times(self): |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
596 |
"""Return the times for the Top URL by total time"""
|
597 |
# Sort the result by total time
|
|
598 |
return sorted( |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
599 |
self.url_times.items(), |
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
600 |
key=lambda (url, stats): stats.total_time, |
601 |
reverse=True)[:self.top_urls] |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
602 |
|
603 |
def get_pageid_times(self): |
|
604 |
"""Return the times for the pageids."""
|
|
11775.2.10
by Francis J. Lacoste
Refactor get_category_times to use a template method and implemented get_top_url_times() and get_pageid_times() on top of it. |
605 |
# Sort the result by pageid
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
606 |
return sorted(self.pageid_times.items()) |
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
607 |
|
11775.2.26
by Francis J. Lacoste
Implement __add__ for RequestTimes. |
608 |
def __add__(self, other): |
609 |
"""Merge two RequestTimes together."""
|
|
610 |
results = copy.deepcopy(self) |
|
611 |
for other_category, other_stats in other.category_times: |
|
612 |
for i, (category, stats) in enumerate(self.category_times): |
|
613 |
if category.title == other_category.title: |
|
614 |
results.category_times[i] = ( |
|
615 |
category, stats + other_stats) |
|
616 |
break
|
|
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
617 |
else: |
11775.2.26
by Francis J. Lacoste
Implement __add__ for RequestTimes. |
618 |
results.category_times.append( |
619 |
(other_category, copy.deepcopy(other_stats))) |
|
620 |
||
621 |
url_times = results.url_times |
|
622 |
for url, stats in other.url_times.items(): |
|
623 |
if url in url_times: |
|
624 |
url_times[url] += stats |
|
625 |
else: |
|
626 |
url_times[url] = copy.deepcopy(stats) |
|
11775.2.28
by Francis J. Lacoste
Crop the url_times after merge. |
627 |
# Only keep top_urls_cache_size entries.
|
628 |
if len(self.url_times) > self.top_urls_cache_size: |
|
629 |
self.url_times = dict( |
|
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
630 |
sorted( |
631 |
url_times.items(), |
|
632 |
key=lambda (url, stats): stats.total_time, |
|
633 |
reverse=True)[:self.top_urls_cache_size]) |
|
11775.2.26
by Francis J. Lacoste
Implement __add__ for RequestTimes. |
634 |
|
635 |
pageid_times = results.pageid_times |
|
636 |
for pageid, stats in other.pageid_times.items(): |
|
637 |
if pageid in pageid_times: |
|
638 |
pageid_times[pageid] += stats |
|
639 |
else: |
|
640 |
pageid_times[pageid] = copy.deepcopy(stats) |
|
641 |
||
642 |
return results |
|
643 |
||
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
644 |
|
645 |
def main(): |
|
10209.2.7
by Stuart Bishop
Date range filter |
646 |
parser = LPOptionParser("%prog [args] tracelog [...]") |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
647 |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
648 |
parser.add_option( |
649 |
"-c", "--config", dest="config", |
|
10209.2.4
by Stuart Bishop
Shuffle files |
650 |
default=os.path.join( |
651 |
config.root, "utilities", "page-performance-report.ini"), |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
652 |
metavar="FILE", help="Load configuration from FILE") |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
653 |
parser.add_option( |
10209.2.7
by Stuart Bishop
Date range filter |
654 |
"--from", dest="from_ts", type="datetime", |
655 |
default=None, metavar="TIMESTAMP", |
|
656 |
help="Ignore log entries before TIMESTAMP") |
|
657 |
parser.add_option( |
|
658 |
"--until", dest="until_ts", type="datetime", |
|
659 |
default=None, metavar="TIMESTAMP", |
|
660 |
help="Ignore log entries after TIMESTAMP") |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
661 |
parser.add_option( |
12929.6.12
by Francis J. Lacoste
Add a report with only the partition categories in. |
662 |
"--no-partition", dest="partition", |
663 |
action="store_false", default=True, |
|
664 |
help="Do not produce partition report") |
|
665 |
parser.add_option( |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
666 |
"--no-categories", dest="categories", |
667 |
action="store_false", default=True, |
|
668 |
help="Do not produce categories report") |
|
669 |
parser.add_option( |
|
670 |
"--no-pageids", dest="pageids", |
|
671 |
action="store_false", default=True, |
|
672 |
help="Do not produce pageids report") |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
673 |
parser.add_option( |
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
674 |
"--top-urls", dest="top_urls", type=int, metavar="N", |
10209.3.5
by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts |
675 |
default=50, help="Generate report for top N urls by hitcount.") |
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
676 |
parser.add_option( |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
677 |
"--directory", dest="directory", |
678 |
default=os.getcwd(), metavar="DIR", |
|
679 |
help="Output reports in DIR directory") |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
680 |
parser.add_option( |
681 |
"--timeout", dest="timeout", |
|
12929.6.3
by Francis J. Lacoste
Change the default timeout to production value, improved options documentation and use only one bin above timeout value. |
682 |
# Default to 9: our production timeout.
|
683 |
default=9, type="int", metavar="SECONDS", |
|
684 |
help="The configured timeout value: used to determine high risk " + |
|
685 |
"page ids. That would be pages which 99% under render time is " |
|
686 |
"greater than timeoout - 2s. Default is %defaults.") |
|
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
687 |
parser.add_option( |
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
688 |
"--histogram-resolution", dest="resolution", |
689 |
# Default to 0.5s
|
|
690 |
default=0.5, type="float", metavar="SECONDS", |
|
691 |
help="The resolution of the histogram bin width. Detault to " |
|
692 |
"%defaults.") |
|
11775.2.27
by Francis J. Lacoste
Implement --merge option. |
693 |
parser.add_option( |
694 |
"--merge", dest="merge", |
|
695 |
default=False, action='store_true', |
|
12929.6.3
by Francis J. Lacoste
Change the default timeout to production value, improved options documentation and use only one bin above timeout value. |
696 |
help="Files are interpreted as pickled stats and are aggregated " + |
697 |
"for the report.") |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
698 |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
699 |
options, args = parser.parse_args() |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
700 |
|
701 |
if not os.path.isdir(options.directory): |
|
702 |
parser.error("Directory %s does not exist" % options.directory) |
|
703 |
||
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
704 |
if len(args) == 0: |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
705 |
parser.error("At least one zserver tracelog file must be provided") |
706 |
||
10209.2.7
by Stuart Bishop
Date range filter |
707 |
if options.from_ts is not None and options.until_ts is not None: |
708 |
if options.from_ts > options.until_ts: |
|
709 |
parser.error( |
|
710 |
"--from timestamp %s is before --until timestamp %s" |
|
711 |
% (options.from_ts, options.until_ts)) |
|
11775.2.27
by Francis J. Lacoste
Implement --merge option. |
712 |
if options.from_ts is not None or options.until_ts is not None: |
713 |
if options.merge: |
|
714 |
parser.error('--from and --until cannot be used with --merge') |
|
10209.2.7
by Stuart Bishop
Date range filter |
715 |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
716 |
for filename in args: |
717 |
if not os.path.exists(filename): |
|
718 |
parser.error("Tracelog file %s not found." % filename) |
|
719 |
||
720 |
if not os.path.exists(options.config): |
|
721 |
parser.error("Config file %s not found." % options.config) |
|
722 |
||
10209.2.20
by Stuart Bishop
Basic report ordering |
723 |
# Need a better config mechanism as ConfigParser doesn't preserve order.
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
724 |
script_config = RawConfigParser() |
725 |
script_config.optionxform = str # Make keys case sensitive. |
|
726 |
script_config.readfp(open(options.config)) |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
727 |
|
728 |
categories = [] # A list of Category, in report order. |
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
729 |
for option in script_config.options('categories'): |
730 |
regexp = script_config.get('categories', option) |
|
10209.2.14
by Stuart Bishop
Handle bad regexps |
731 |
try: |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
732 |
categories.append(Category(option, regexp)) |
10209.2.14
by Stuart Bishop
Handle bad regexps |
733 |
except sre_constants.error, x: |
734 |
log.fatal("Unable to compile regexp %r (%s)" % (regexp, x)) |
|
735 |
return 1 |
|
10209.2.20
by Stuart Bishop
Basic report ordering |
736 |
categories.sort() |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
737 |
|
738 |
if len(categories) == 0: |
|
739 |
parser.error("No data in [categories] section of configuration.") |
|
740 |
||
12929.6.5
by Francis J. Lacoste
Add support for defining categories that should represent a partition. |
741 |
# Determine the categories making a partition of the requests
|
742 |
for option in script_config.options('partition'): |
|
743 |
for category in categories: |
|
744 |
if category.title == option: |
|
745 |
category.partition = True |
|
746 |
break
|
|
747 |
else: |
|
748 |
log.warning( |
|
749 |
"In partition definition: %s isn't a defined category", |
|
750 |
option) |
|
751 |
||
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
752 |
times = RequestTimes(categories, options) |
753 |
||
11775.2.27
by Francis J. Lacoste
Implement --merge option. |
754 |
if options.merge: |
755 |
for filename in args: |
|
756 |
log.info('Merging %s...' % filename) |
|
757 |
f = bz2.BZ2File(filename, 'r') |
|
758 |
times += cPickle.load(f) |
|
759 |
f.close() |
|
760 |
else: |
|
761 |
parse(args, times, options) |
|
11775.2.18
by Francis J. Lacoste
Compute reports using only one pass over the data. |
762 |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
763 |
category_times = times.get_category_times() |
764 |
||
765 |
pageid_times = [] |
|
766 |
url_times= [] |
|
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
767 |
if options.top_urls: |
11775.2.21
by Francis J. Lacoste
Ensure constant memory usage by limiting the size of the url_times cache. |
768 |
url_times = times.get_top_urls_times() |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
769 |
if options.pageids: |
770 |
pageid_times = times.get_pageid_times() |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
771 |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
772 |
def _report_filename(filename): |
773 |
return os.path.join(options.directory, filename) |
|
774 |
||
12929.6.12
by Francis J. Lacoste
Add a report with only the partition categories in. |
775 |
# Partition report
|
776 |
if options.partition: |
|
777 |
report_filename = _report_filename('partition.html') |
|
778 |
log.info("Generating %s", report_filename) |
|
779 |
partition_times = [ |
|
780 |
category_time
|
|
781 |
for category_time in category_times |
|
782 |
if category_time[0].partition] |
|
783 |
html_report( |
|
784 |
open(report_filename, 'w'), partition_times, None, None, |
|
785 |
histogram_resolution=options.resolution, |
|
786 |
category_name='Partition') |
|
787 |
||
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
788 |
# Category only report.
|
789 |
if options.categories: |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
790 |
report_filename = _report_filename('categories.html') |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
791 |
log.info("Generating %s", report_filename) |
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
792 |
html_report( |
793 |
open(report_filename, 'w'), category_times, None, None, |
|
794 |
histogram_resolution=options.resolution) |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
795 |
|
796 |
# Pageid only report.
|
|
797 |
if options.pageids: |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
798 |
report_filename = _report_filename('pageids.html') |
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
799 |
log.info("Generating %s", report_filename) |
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
800 |
html_report( |
801 |
open(report_filename, 'w'), None, pageid_times, None, |
|
802 |
histogram_resolution=options.resolution) |
|
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
803 |
|
804 |
# Top URL only report.
|
|
805 |
if options.top_urls: |
|
10209.3.5
by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts |
806 |
report_filename = _report_filename('top%d.html' % options.top_urls) |
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
807 |
log.info("Generating %s", report_filename) |
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
808 |
html_report( |
809 |
open(report_filename, 'w'), None, None, url_times, |
|
810 |
histogram_resolution=options.resolution) |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
811 |
|
812 |
# Combined report.
|
|
813 |
if options.categories and options.pageids: |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
814 |
report_filename = _report_filename('combined.html') |
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
815 |
html_report( |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
816 |
open(report_filename, 'w'), |
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
817 |
category_times, pageid_times, url_times, |
818 |
histogram_resolution=options.resolution) |
|
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
819 |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
820 |
# Report of likely timeout candidates
|
821 |
report_filename = _report_filename('timeout-candidates.html') |
|
10209.3.5
by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts |
822 |
log.info("Generating %s", report_filename) |
823 |
html_report( |
|
824 |
open(report_filename, 'w'), None, pageid_times, None, |
|
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
825 |
options.timeout - 2, |
826 |
histogram_resolution=options.resolution) |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
827 |
|
11775.2.22
by Francis J. Lacoste
Save pickle of the stats file for fast merging. |
828 |
# Save the times cache for later merging.
|
829 |
report_filename = _report_filename('stats.pck.bz2') |
|
830 |
log.info("Saving times database in %s", report_filename) |
|
831 |
stats_file = bz2.BZ2File(report_filename, 'w') |
|
832 |
cPickle.dump(times, stats_file, protocol=cPickle.HIGHEST_PROTOCOL) |
|
833 |
stats_file.close() |
|
834 |
||
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
835 |
# Output metrics for selected categories.
|
836 |
report_filename = _report_filename('metrics.dat') |
|
837 |
log.info('Saving category_metrics %s', report_filename) |
|
838 |
metrics_file = open(report_filename, 'w') |
|
839 |
writer = csv.writer(metrics_file, delimiter=':') |
|
840 |
date = options.until_ts or options.from_ts or datetime.utcnow() |
|
841 |
date = time.mktime(date.timetuple()) |
|
842 |
||
843 |
for option in script_config.options('metrics'): |
|
844 |
name = script_config.get('metrics', option) |
|
845 |
for category, stats in category_times: |
|
846 |
if category.title == name: |
|
847 |
writer.writerows([ |
|
848 |
("%s_99" % option, "%f@%d" % ( |
|
849 |
stats.ninetyninth_percentile_time, date)), |
|
12929.6.4
by Francis J. Lacoste
Drop the unintersting mean metrics, but do record the number of hits. |
850 |
("%s_hits" % option, "%d@%d" % (stats.total_hits, date))]) |
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
851 |
break
|
11775.2.31
by Francis J. Lacoste
Clarity improvements. |
852 |
else: |
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
853 |
log.warning("Can't find category %s for metric %s" % ( |
854 |
option, name)) |
|
855 |
metrics_file.close() |
|
856 |
||
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
857 |
return 0 |
858 |
||
859 |
||
10209.2.11
by Stuart Bishop
Handle real data |
860 |
def smart_open(filename, mode='r'): |
861 |
"""Open a file, transparently handling compressed files.
|
|
862 |
||
863 |
Compressed files are detected by file extension.
|
|
864 |
"""
|
|
865 |
ext = os.path.splitext(filename)[1] |
|
866 |
if ext == '.bz2': |
|
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
867 |
return bz2.BZ2File(filename, 'r') |
10209.2.11
by Stuart Bishop
Handle real data |
868 |
elif ext == '.gz': |
11775.2.19
by Francis J. Lacoste
Save configurable metrics for charting. Use builtin compression module. Fix some lint. |
869 |
return gzip.GzipFile(filename, 'r') |
10209.2.11
by Stuart Bishop
Handle real data |
870 |
else: |
871 |
return open(filename, mode) |
|
872 |
||
873 |
||
874 |
class MalformedLine(Exception): |
|
875 |
"""A malformed line was found in the trace log."""
|
|
876 |
||
877 |
||
878 |
_ts_re = re.compile( |
|
879 |
'^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.(\d{6}))?$') |
|
880 |
||
881 |
||
882 |
def parse_timestamp(ts_string): |
|
883 |
match = _ts_re.search(ts_string) |
|
884 |
if match is None: |
|
885 |
raise ValueError("Invalid timestamp") |
|
886 |
return datetime( |
|
887 |
*(int(elem) for elem in match.groups() if elem is not None)) |
|
888 |
||
889 |
||
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
890 |
def parse(tracefiles, times, options): |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
891 |
requests = {} |
10209.2.13
by Stuart Bishop
Status messages |
892 |
total_requests = 0 |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
893 |
for tracefile in tracefiles: |
10209.2.13
by Stuart Bishop
Status messages |
894 |
log.info('Processing %s', tracefile) |
10209.2.11
by Stuart Bishop
Handle real data |
895 |
for line in smart_open(tracefile): |
896 |
line = line.rstrip() |
|
897 |
try: |
|
10209.2.22
by Stuart Bishop
Emit pageid to the ZServer tracelog |
898 |
record = line.split(' ', 7) |
10209.2.11
by Stuart Bishop
Handle real data |
899 |
try: |
900 |
record_type, request_id, date, time_ = record[:4] |
|
901 |
except ValueError: |
|
902 |
raise MalformedLine() |
|
903 |
||
904 |
if record_type == 'S': |
|
905 |
# Short circuit - we don't care about these entries.
|
|
906 |
continue
|
|
907 |
||
908 |
# Parse the timestamp.
|
|
909 |
ts_string = '%s %s' % (date, time_) |
|
910 |
try: |
|
911 |
dt = parse_timestamp(ts_string) |
|
912 |
except ValueError: |
|
913 |
raise MalformedLine( |
|
914 |
'Invalid timestamp %s' % repr(ts_string)) |
|
915 |
||
916 |
# Filter entries by command line date range.
|
|
917 |
if options.from_ts is not None and dt < options.from_ts: |
|
918 |
continue # Skip to next line. |
|
919 |
if options.until_ts is not None and dt > options.until_ts: |
|
920 |
break # Skip to next log file. |
|
921 |
||
922 |
args = record[4:] |
|
923 |
||
924 |
def require_args(count): |
|
925 |
if len(args) < count: |
|
926 |
raise MalformedLine() |
|
927 |
||
928 |
if record_type == 'B': # Request begins. |
|
929 |
require_args(2) |
|
930 |
requests[request_id] = Request(dt, args[0], args[1]) |
|
931 |
continue
|
|
932 |
||
933 |
request = requests.get(request_id, None) |
|
934 |
if request is None: # Just ignore partial records. |
|
935 |
continue
|
|
936 |
||
10209.2.23
by Stuart Bishop
Review feedback and fixes |
937 |
# Old stype extension record from Launchpad. Just
|
938 |
# contains the URL.
|
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
939 |
if (record_type == '-' and len(args) == 1 |
940 |
and args[0].startswith('http')): |
|
10209.2.23
by Stuart Bishop
Review feedback and fixes |
941 |
request.url = args[0] |
10209.2.22
by Stuart Bishop
Emit pageid to the ZServer tracelog |
942 |
|
10209.2.23
by Stuart Bishop
Review feedback and fixes |
943 |
# New style extension record with a prefix.
|
944 |
elif record_type == '-': |
|
10209.2.22
by Stuart Bishop
Emit pageid to the ZServer tracelog |
945 |
# Launchpad outputs several things as tracelog
|
946 |
# extension records. We include a prefix to tell
|
|
947 |
# them apart.
|
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
948 |
require_args(1) |
10209.2.22
by Stuart Bishop
Emit pageid to the ZServer tracelog |
949 |
|
10209.2.23
by Stuart Bishop
Review feedback and fixes |
950 |
parse_extension_record(request, args) |
10209.2.11
by Stuart Bishop
Handle real data |
951 |
|
952 |
elif record_type == 'I': # Got request input. |
|
953 |
require_args(1) |
|
954 |
request.I(dt, args[0]) |
|
955 |
||
956 |
elif record_type == 'C': # Entered application thread. |
|
957 |
request.C(dt) |
|
958 |
||
959 |
elif record_type == 'A': # Application done. |
|
960 |
require_args(2) |
|
961 |
request.A(dt, args[0], args[1]) |
|
962 |
||
963 |
elif record_type == 'E': # Request done. |
|
964 |
del requests[request_id] |
|
965 |
request.E(dt) |
|
10209.2.13
by Stuart Bishop
Status messages |
966 |
total_requests += 1 |
967 |
if total_requests % 10000 == 0: |
|
968 |
log.debug("Parsed %d requests", total_requests) |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
969 |
|
970 |
# Add the request to any matching categories.
|
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
971 |
times.add_request(request) |
10209.2.11
by Stuart Bishop
Handle real data |
972 |
else: |
973 |
raise MalformedLine('Unknown record type %s', record_type) |
|
974 |
except MalformedLine, x: |
|
975 |
log.error( |
|
10209.2.40
by Stuart Bishop
Bug |
976 |
"Malformed line %s (%s)" % (repr(line), x)) |
10209.2.2
by Stuart Bishop
Basic page performance stats, although they are currently wrong |
977 |
|
10209.2.7
by Stuart Bishop
Date range filter |
978 |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
979 |
def parse_extension_record(request, args): |
10209.2.23
by Stuart Bishop
Review feedback and fixes |
980 |
"""Decode a ZServer extension records and annotate request."""
|
981 |
prefix = args[0] |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
982 |
|
10209.2.23
by Stuart Bishop
Review feedback and fixes |
983 |
if prefix == 'u': |
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
984 |
request.url = ' '.join(args[1:]) or None |
10209.2.23
by Stuart Bishop
Review feedback and fixes |
985 |
elif prefix == 'p': |
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
986 |
request.pageid = ' '.join(args[1:]) or None |
987 |
elif prefix == 't': |
|
988 |
if len(args) != 4: |
|
989 |
raise MalformedLine("Wrong number of arguments %s" % (args,)) |
|
10209.2.46
by Stuart Bishop
Spool numbers to disk instead of keeping in ram |
990 |
request.sql_statements = int(args[2]) |
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
991 |
request.sql_seconds = float(args[3]) / 1000 |
10209.2.23
by Stuart Bishop
Review feedback and fixes |
992 |
else: |
993 |
raise MalformedLine( |
|
994 |
"Unknown extension prefix %s" % prefix) |
|
995 |
||
996 |
||
10209.3.5
by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts |
997 |
def html_report( |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
998 |
outf, category_times, pageid_times, url_times, |
12929.6.12
by Francis J. Lacoste
Add a report with only the partition categories in. |
999 |
ninetyninth_percentile_threshold=None, histogram_resolution=0.5, |
1000 |
category_name='Category'): |
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
1001 |
"""Write an html report to outf.
|
1002 |
||
1003 |
:param outf: A file object to write the report to.
|
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1004 |
:param category_times: The time statistics for categories.
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
1005 |
:param pageid_times: The time statistics for pageids.
|
10209.3.5
by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts |
1006 |
:param url_times: The time statistics for the top XXX urls.
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
1007 |
:param ninetyninth_percentile_threshold: Lower threshold for inclusion of
|
1008 |
pages in the pageid section; pages where 99 percent of the requests are
|
|
1009 |
served under this threshold will not be included.
|
|
12929.6.12
by Francis J. Lacoste
Add a report with only the partition categories in. |
1010 |
:param histogram_resolution: used as the histogram bar width
|
1011 |
:param category_name: The name to use for category report. Defaults to
|
|
1012 |
'Category'.
|
|
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
1013 |
"""
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1014 |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1015 |
print >> outf, dedent('''\ |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1016 |
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
1017 |
"http://www.w3.org/TR/html4/loose.dtd">
|
|
1018 |
<html>
|
|
1019 |
<head>
|
|
1020 |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
1021 |
<title>Launchpad Page Performance Report %(date)s</title> |
|
1022 |
<script language="javascript" type="text/javascript"
|
|
13636.2.4
by Raphael Badin
Change JSÂ files location. |
1023 |
src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.min.js"
|
1024 |
></script>
|
|
1025 |
<script language="javascript" type="text/javascript"
|
|
1026 |
src="https://devpad.canonical.com/~lpqateam/ppr/js/jquery.appear-1.1.1.min.js"
|
|
1027 |
></script>
|
|
1028 |
<script language="javascript" type="text/javascript"
|
|
1029 |
src="https://devpad.canonical.com/~lpqateam/ppr/js/flot/jquery.flot.min.js"
|
|
1030 |
></script>
|
|
1031 |
<script language="javascript" type="text/javascript"
|
|
1032 |
src="https://devpad.canonical.com/~lpqateam/ppr/js/sorttable.js"></script>
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1033 |
<style type="text/css">
|
12384.1.1
by Curtis Hovey
Switch font-size from percentages to ems. |
1034 |
h3 { font-weight: normal; font-size: 1em; }
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1035 |
thead th { padding-left: 1em; padding-right: 1em; }
|
12929.6.7
by Francis J. Lacoste
Minimize cell title width. |
1036 |
.category-title { text-align: right; padding-right: 2em;
|
1037 |
max-width: 25em; }
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1038 |
.regexp { font-size: x-small; font-weight: normal; }
|
1039 |
.mean { text-align: right; padding-right: 1em; }
|
|
1040 |
.median { text-align: right; padding-right: 1em; }
|
|
1041 |
.standard-deviation { text-align: right; padding-right: 1em; }
|
|
10209.2.17
by Stuart Bishop
Tweak logarithmic scale |
1042 |
.histogram { padding: 0.5em 1em; width:400px; height:250px; }
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1043 |
.odd-row { background-color: #eeeeff; }
|
1044 |
.even-row { background-color: #ffffee; }
|
|
10209.2.28
by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second |
1045 |
table.sortable thead {
|
1046 |
background-color:#eee;
|
|
1047 |
color:#666666;
|
|
1048 |
font-weight: bold;
|
|
1049 |
cursor: default;
|
|
1050 |
}
|
|
10209.2.30
by Stuart Bishop
Formatting |
1051 |
td.numeric {
|
1052 |
font-family: monospace;
|
|
1053 |
text-align: right;
|
|
1054 |
padding: 1em;
|
|
1055 |
}
|
|
1056 |
.clickable { cursor: hand; }
|
|
12929.6.2
by Francis J. Lacoste
Use proper CSS class name. |
1057 |
.total-hits, .histogram, .median-sqltime,
|
1058 |
.median-sqlstatements { border-right: 1px dashed #000000; }
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1059 |
</style>
|
1060 |
</head>
|
|
1061 |
<body>
|
|
1062 |
<h1>Launchpad Page Performance Report</h1>
|
|
10209.2.31
by Stuart Bishop
Fix closing tag |
1063 |
<h3>%(date)s</h3> |
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1064 |
''' % {'date': time.ctime()}) |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1065 |
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1066 |
table_header = dedent('''\ |
10209.2.28
by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second |
1067 |
<table class="sortable page-performance-report">
|
10209.2.33
by Stuart Bishop
Add a caption indicating column headings are clickable |
1068 |
<caption align="top">Click on column headings to sort.</caption>
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1069 |
<thead>
|
1070 |
<tr>
|
|
10209.2.30
by Stuart Bishop
Formatting |
1071 |
<th class="clickable">Name</th>
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
1072 |
|
1073 |
<th class="clickable">Total Hits</th>
|
|
1074 |
||
10209.3.5
by Stuart Bishop
Merge lp:~lifeless/launchpad/foundations, resolving conflicts |
1075 |
<th class="clickable">99% Under Time (secs)</th>
|
1076 |
||
10209.2.30
by Stuart Bishop
Formatting |
1077 |
<th class="clickable">Mean Time (secs)</th>
|
10209.2.37
by Stuart Bishop
Add variance by request |
1078 |
<th class="clickable">Time Standard Deviation</th>
|
10209.2.30
by Stuart Bishop
Formatting |
1079 |
<th class="clickable">Median Time (secs)</th>
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
1080 |
<th class="sorttable_nosort">Time Distribution</th>
|
1081 |
||
12929.6.1
by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under |
1082 |
<th class="clickable">99% Under SQL Time (secs)</th>
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
1083 |
<th class="clickable">Mean SQL Time (secs)</th>
|
1084 |
<th class="clickable">SQL Time Standard Deviation</th>
|
|
1085 |
<th class="clickable">Median SQL Time (secs)</th>
|
|
1086 |
||
12929.6.1
by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under |
1087 |
<th class="clickable">99% Under SQL Statements</th>
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
1088 |
<th class="clickable">Mean SQL Statements</th>
|
1089 |
<th class="clickable">SQL Statement Standard Deviation</th>
|
|
1090 |
<th class="clickable">Median SQL Statements</th>
|
|
1091 |
||
12929.6.17
by Francis J. Lacoste
Better column spelling. |
1092 |
<th class="clickable">Hits * 99% Under SQL Statement</th>
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1093 |
</tr>
|
1094 |
</thead>
|
|
1095 |
<tbody>
|
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1096 |
''') |
1097 |
table_footer = "</tbody></table>" |
|
1098 |
||
1099 |
# Store our generated histograms to output Javascript later.
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1100 |
histograms = [] |
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1101 |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1102 |
def handle_times(html_title, stats): |
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
1103 |
histograms.append(stats.histogram) |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1104 |
print >> outf, dedent("""\ |
10209.2.28
by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second |
1105 |
<tr>
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1106 |
<th class="category-title">%s</th> |
12929.6.2
by Francis J. Lacoste
Use proper CSS class name. |
1107 |
<td class="numeric total-hits">%d</td> |
1108 |
<td class="numeric 99pc-under-time">%.2f</td> |
|
1109 |
<td class="numeric mean-time">%.2f</td> |
|
1110 |
<td class="numeric std-time">%.2f</td> |
|
1111 |
<td class="numeric median-time">%.2f</td> |
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1112 |
<td>
|
1113 |
<div class="histogram" id="histogram%d"></div> |
|
1114 |
</td>
|
|
12929.6.2
by Francis J. Lacoste
Use proper CSS class name. |
1115 |
<td class="numeric 99pc-under-sqltime">%.2f</td> |
1116 |
<td class="numeric mean-sqltime">%.2f</td> |
|
1117 |
<td class="numeric std-sqltime">%.2f</td> |
|
1118 |
<td class="numeric median-sqltime">%.2f</td> |
|
10209.2.35
by Stuart Bishop
Add sql statements and times to page-performance-report |
1119 |
|
12929.6.2
by Francis J. Lacoste
Use proper CSS class name. |
1120 |
<td class="numeric 99pc-under-sqlstatement">%.f</td>
|
1121 |
<td class="numeric mean-sqlstatements">%.2f</td> |
|
1122 |
<td class="numeric std-sqlstatements">%.2f</td> |
|
1123 |
<td class="numeric median-sqlstatements">%.2f</td> |
|
12929.6.16
by Francis J. Lacoste
Remove Total time column and add a Hits * 99% under SQL Statements column |
1124 |
|
1125 |
<td class="numeric high-db-usage">%.f</td>
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1126 |
</tr>
|
1127 |
""" % ( |
|
10209.2.28
by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second |
1128 |
html_title, |
12929.6.16
by Francis J. Lacoste
Remove Total time column and add a Hits * 99% under SQL Statements column |
1129 |
stats.total_hits, stats.ninetyninth_percentile_time, |
11775.2.3
by Francis J. Lacoste
No point in displaying both variance and std. |
1130 |
stats.mean, stats.std, stats.median, |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1131 |
len(histograms) - 1, |
12929.6.1
by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under |
1132 |
stats.ninetyninth_percentile_sqltime, stats.mean_sqltime, |
11775.2.3
by Francis J. Lacoste
No point in displaying both variance and std. |
1133 |
stats.std_sqltime, stats.median_sqltime, |
12929.6.1
by Francis J. Lacoste
Replace Total SQL Time and SQL Statements with the 99% under |
1134 |
stats.ninetyninth_percentile_sqlstatements, |
1135 |
stats.mean_sqlstatements, |
|
12929.6.16
by Francis J. Lacoste
Remove Total time column and add a Hits * 99% under SQL Statements column |
1136 |
stats.std_sqlstatements, stats.median_sqlstatements, |
1137 |
stats.ninetyninth_percentile_sqlstatements* stats.total_hits, |
|
1138 |
))
|
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1139 |
|
10209.2.30
by Stuart Bishop
Formatting |
1140 |
# Table of contents
|
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
1141 |
print >> outf, '<ol>' |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1142 |
if category_times: |
12929.6.12
by Francis J. Lacoste
Add a report with only the partition categories in. |
1143 |
print >> outf, '<li><a href="#catrep">%s Report</a></li>' % ( |
1144 |
category_name) |
|
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
1145 |
if pageid_times: |
1146 |
print >> outf, '<li><a href="#pageidrep">Pageid Report</a></li>' |
|
1147 |
if url_times: |
|
1148 |
print >> outf, '<li><a href="#topurlrep">Top URL Report</a></li>' |
|
1149 |
print >> outf, '</ol>' |
|
10209.2.30
by Stuart Bishop
Formatting |
1150 |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1151 |
if category_times: |
12929.6.12
by Francis J. Lacoste
Add a report with only the partition categories in. |
1152 |
print >> outf, '<h2 id="catrep">%s Report</h2>' % ( |
1153 |
category_name) |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1154 |
print >> outf, table_header |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1155 |
for category, times in category_times: |
10209.2.28
by Stuart Bishop
Sortable columns, override broken Request methods to report fractions of a second |
1156 |
html_title = '%s<br/><span class="regexp">%s</span>' % ( |
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1157 |
html_quote(category.title), html_quote(category.regexp)) |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1158 |
handle_times(html_title, times) |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1159 |
print >> outf, table_footer |
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1160 |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1161 |
if pageid_times: |
1162 |
print >> outf, '<h2 id="pageidrep">Pageid Report</h2>' |
|
1163 |
print >> outf, table_header |
|
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1164 |
for pageid, times in pageid_times: |
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
1165 |
if (ninetyninth_percentile_threshold is not None and |
11775.2.4
by Francis J. Lacoste
Use a two-pass algorithm to use less memory. Records are first parsed into a SQLite3 DB before generating the reports. |
1166 |
(times.ninetyninth_percentile_time < |
11318.6.1
by Robert Collins
Generate a timeout candidates report too |
1167 |
ninetyninth_percentile_threshold)): |
1168 |
continue
|
|
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1169 |
handle_times(html_quote(pageid), times) |
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1170 |
print >> outf, table_footer |
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1171 |
|
10209.2.48
by Stuart Bishop
Top N URLs by hit count |
1172 |
if url_times: |
1173 |
print >> outf, '<h2 id="topurlrep">Top URL Report</h2>' |
|
1174 |
print >> outf, table_header |
|
1175 |
for url, times in url_times: |
|
1176 |
handle_times(html_quote(url), times) |
|
1177 |
print >> outf, table_footer |
|
1178 |
||
10209.2.25
by Stuart Bishop
Pageids in addition to url regexp categories |
1179 |
# Ourput the javascript to render our histograms nicely, replacing
|
1180 |
# the placeholder <div> tags output earlier.
|
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1181 |
print >> outf, dedent("""\ |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1182 |
<script language="javascript" type="text/javascript">
|
1183 |
$(function () {
|
|
1184 |
var options = {
|
|
1185 |
series: {
|
|
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
1186 |
bars: {show: true, barWidth: %s} |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1187 |
},
|
1188 |
xaxis: {
|
|
10209.2.5
by Stuart Bishop
Polish |
1189 |
tickFormatter: function (val, axis) {
|
1190 |
return val.toFixed(axis.tickDecimals) + "s";
|
|
1191 |
}
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1192 |
},
|
1193 |
yaxis: {
|
|
1194 |
min: 0,
|
|
1195 |
max: 1,
|
|
10209.2.16
by Stuart Bishop
Try a logarithmic scale to improve readability |
1196 |
transform: function (v) {
|
10209.2.17
by Stuart Bishop
Tweak logarithmic scale |
1197 |
return Math.pow(Math.log(v*100+1)/Math.LN2, 0.5);
|
10209.2.16
by Stuart Bishop
Try a logarithmic scale to improve readability |
1198 |
},
|
1199 |
inverseTransform: function (v) {
|
|
10209.2.17
by Stuart Bishop
Tweak logarithmic scale |
1200 |
return Math.pow(Math.exp(v*100+1)/Math.LN2, 2);
|
10209.2.16
by Stuart Bishop
Try a logarithmic scale to improve readability |
1201 |
},
|
1202 |
tickDecimals: 1,
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1203 |
tickFormatter: function (val, axis) {
|
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
1204 |
return (val * 100).toFixed(axis.tickDecimals) + "%%"; |
10209.2.16
by Stuart Bishop
Try a logarithmic scale to improve readability |
1205 |
},
|
1206 |
ticks: [0.001,0.01,0.10,0.50,1.0]
|
|
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1207 |
},
|
1208 |
grid: {
|
|
1209 |
aboveData: true,
|
|
1210 |
labelMargin: 15
|
|
1211 |
}
|
|
1212 |
};
|
|
12929.6.11
by Francis J. Lacoste
Fix bar width on the histogram. |
1213 |
""" % histogram_resolution) |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1214 |
|
10209.2.11
by Stuart Bishop
Handle real data |
1215 |
for i, histogram in enumerate(histograms): |
12929.6.10
by Francis J. Lacoste
Add histogram_resolution option. Uses Histogram class. |
1216 |
if histogram.count == 0: |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1217 |
continue
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1218 |
print >> outf, dedent("""\ |
13636.2.2
by Raphael Badin
Improve js string generation. |
1219 |
function plot_histogram_%(id)d() { |
1220 |
var d = %(data)s; |
|
13636.2.1
by Raphael Badin
Only display graphs when they are viewed. |
1221 |
|
1222 |
$.plot(
|
|
13636.2.2
by Raphael Badin
Improve js string generation. |
1223 |
$("#histogram%(id)d"), |
13636.2.1
by Raphael Badin
Only display graphs when they are viewed. |
1224 |
[{data: d}], options); |
1225 |
}
|
|
13636.2.2
by Raphael Badin
Improve js string generation. |
1226 |
$('#histogram%(id)d').appear(function() { |
1227 |
plot_histogram_%(id)d(); |
|
13636.2.1
by Raphael Badin
Only display graphs when they are viewed. |
1228 |
});
|
1229 |
||
13636.2.2
by Raphael Badin
Improve js string generation. |
1230 |
""" % {'id': i, 'data': json.dumps(histogram.bins_relative)}) |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1231 |
|
10209.2.43
by Stuart Bishop
Generate pageids, categories and combined reports simultaneously |
1232 |
print >> outf, dedent("""\ |
10209.2.3
by Stuart Bishop
HTML report + JS graphs |
1233 |
});
|
1234 |
</script>
|
|
1235 |
</body>
|
|
1236 |
</html>
|
|
1237 |
""") |