~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.7 by Karl Fogel
Add the copyright header block to more files.
2
#
14027.3.2 by Jeroen Vermeulen
Merge devel, resolve conflicts.
3
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
8687.15.7 by Karl Fogel
Add the copyright header block to more files.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
5
4935.3.7 by Curtis Hovey
Added bad name suppression to cronscripts.
6
# pylint: disable-msg=C0103,W0403
2002 by Canonical.com Patch Queue Manager
Implements the karma framework (according to the KarmaImplementation spec) and hook it into Malone events. r=spiv,stub
7
2125 by Canonical.com Patch Queue Manager
[r=bjornt] Cronscript refactorings
8
import _pythonpath
3691.9.40 by Guilherme Salgado
Fix a bug in the karmacache updater which was causing the KarmaCache table to bloat absurdly
9
from zope.component import getUtility
10
2002 by Canonical.com Patch Queue Manager
Implements the karma framework (according to the KarmaImplementation spec) and hook it into Malone events. r=spiv,stub
11
from canonical.config import config
7675.357.20 by Stuart Bishop
flush database updates before issuing raw SQL deletes to avoid losing data
12
from canonical.database.sqlbase import (
12415.1.2 by William Grant
Fix foaf-update-karma-cache.py to get a cursor without using the condemned ZTM.
13
    cursor,
14
    flush_database_updates,
15
    )
11270.1.3 by Tim Penhey
Changed NotFoundError imports - gee there were a lot of them.
16
from lp.app.errors import NotFoundError
11882.2.2 by Jonathan Lange
Clear up a heck of a lot of imports from canonical.launchpad.interfaces.
17
from lp.registry.interfaces.karma import IKarmaCacheManager
8356.1.1 by Leonard Richardson
Partial move.
18
from lp.services.scripts.base import LaunchpadCronScript
4264.2.1 by James Henstridge
add a LaunchpadCronScript subclass, and make cronscripts/*.py use it
19
20
21
class KarmaCacheUpdater(LaunchpadCronScript):
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
22
    def main(self):
23
        """Update the KarmaCache table for all valid Launchpad users.
24
25
        For each Launchpad user with a preferred email address, calculate his
26
        karmavalue for each category of actions we have and update his entry
27
        in the KarmaCache table. If a user doesn't have an entry for that
28
        category in KarmaCache a new one will be created.
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
29
30
        Entries in the KarmaTotalCache table will also be created/updated for
31
        each user which has entries in the KarmaCache table. Any user which
32
        doesn't have any entries in the KarmaCache table has its entries
33
        removed from the KarmaTotalCache table as well.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
34
        """
35
        self.logger.info("Updating Launchpad karma caches")
36
12415.1.2 by William Grant
Fix foaf-update-karma-cache.py to get a cursor without using the condemned ZTM.
37
        self.cur = cursor()
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
38
        self.karmacachemanager = getUtility(IKarmaCacheManager)
39
40
        # This method ordering needs to be preserved. In particular,
41
        # C_add_summed_totals method is called last because we don't want to
42
        # include the values added in our calculation in A_update_karmacache.
43
        self.A_update_karmacache()
44
        self.B_update_karmatotalcache()
45
        self.C_add_karmacache_sums()
46
47
        self.logger.info("Finished updating Launchpad karma caches")
48
49
    def A_update_karmacache(self):
50
        self.logger.info("Step A: Calculating individual KarmaCache entries")
51
52
        # Calculate everyones karma. Karma degrades each day, becoming
53
        # worthless after karma_expires_after. This query produces odd results
54
        # when datecreated is in the future, but there is really no point
55
        # adding the extra WHEN clause.
56
        karma_expires_after = '1 year'
57
        self.cur.execute("""
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
58
            SELECT person, category, product, distribution,
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
59
                ROUND(SUM(
60
                CASE WHEN karma.datecreated + %s::interval
61
                    <= CURRENT_TIMESTAMP AT TIME ZONE 'UTC' THEN 0
62
                ELSE points * (1 - extract(
63
                    EPOCH FROM CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
64
                    karma.datecreated
65
                    ) / extract(EPOCH FROM %s::interval))
66
                END
3691.88.7 by Stuart Bishop
Enforce max scaling factor for karma calculations
67
                ))
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
68
            FROM Karma
69
            JOIN KarmaAction ON action = KarmaAction.id
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
70
            GROUP BY person, category, product, distribution
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
71
            """, (karma_expires_after, karma_expires_after))
72
73
        # Suck into RAM to avoid tieing up resources on the DB.
74
        results = list(self.cur.fetchall())
75
        self.logger.debug("Got %d (person, category) scores", len(results))
76
77
        # Note that we don't need to commit each iteration because we are
78
        # running in autocommit mode.
79
        scaling = self.calculate_scaling(results)
80
        for entry in results:
81
            self.update_one_karma_cache_entry(entry, scaling)
7675.357.20 by Stuart Bishop
flush database updates before issuing raw SQL deletes to avoid losing data
82
        flush_database_updates()
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
83
84
        # Delete the entries we're going to replace.
85
        self.cur.execute("DELETE FROM KarmaCache WHERE category IS NULL")
86
        self.cur.execute("""
87
            DELETE FROM KarmaCache
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
88
            WHERE project IS NOT NULL AND product IS NULL""")
89
        self.cur.execute("""
90
            DELETE FROM KarmaCache
91
            WHERE category IS NOT NULL AND project IS NULL AND product IS NULL
92
                  AND distribution IS NULL AND sourcepackagename IS NULL""")
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
93
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
94
        # Don't allow our table to bloat with inactive users.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
95
        self.cur.execute("DELETE FROM KarmaCache WHERE karmavalue <= 0")
96
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
97
        # VACUUM KarmaCache since we have just touched every record in it.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
98
        self.cur.execute("""VACUUM KarmaCache""")
99
100
    def B_update_karmatotalcache(self):
101
        self.logger.info("Step B: Rebuilding KarmaTotalCache")
102
        # Trash old records
103
        self.cur.execute("""
104
            DELETE FROM KarmaTotalCache
105
            WHERE person NOT IN (SELECT person FROM KarmaCache)
106
            """)
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
107
        # Update existing records.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
108
        self.cur.execute("""
109
            UPDATE KarmaTotalCache SET karma_total=sum_karmavalue
110
            FROM (
111
                SELECT person AS sum_person, SUM(karmavalue) AS sum_karmavalue
112
                FROM KarmaCache
113
                GROUP BY person
114
                ) AS sums
115
            WHERE KarmaTotalCache.person = sum_person
116
            """)
117
118
        # VACUUM KarmaTotalCache since we have just touched every row in it.
119
        self.cur.execute("""VACUUM KarmaTotalCache""")
120
10303.1.1 by Gary Poster
use newest version of zc.buildout
121
        # Insert new records into the KarmaTotalCache table.
4664.1.1 by Curtis Hovey
Normalized comments for bug 3732.
122
123
        # XXX: salgado 2007-02-06:
124
        # If deadlocks ever become a problem, first LOCK the
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
125
        # corresponding rows in the Person table so the bulk insert cannot
126
        # fail. We don't bother at the moment as this would involve granting
127
        # UPDATE rights on the Person table to the karmacacheupdater user.
128
        ## cur.execute("BEGIN")
129
        ## cur.execute("""
130
        ##     SELECT * FROM Person
131
        ##     WHERE id NOT IN (SELECT person FROM KarmaTotalCache)
132
        ##     FOR UPDATE
133
        ##     """)
134
135
        self.cur.execute("""
136
            INSERT INTO KarmaTotalCache (person, karma_total)
137
            SELECT person, SUM(karmavalue) FROM KarmaCache
138
            WHERE person NOT IN (SELECT person FROM KarmaTotalCache)
139
            GROUP BY person
140
            """)
141
142
        ## self.cur.execute("COMMIT")
143
144
    def C_add_karmacache_sums(self):
145
        self.logger.info("Step C: Calculating KarmaCache sums")
10303.1.1 by Gary Poster
use newest version of zc.buildout
146
        # We must issue some SUM queries to insert the karma totals for:
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
147
        # - All actions of a person on a given product.
148
        # - All actions of a person on a given distribution.
149
        # - All actions of a person on a given project.
150
        # - All actions with a specific category of a person on a given
151
        #   project.
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
152
        # - All actions with a specific category of a person.
153
154
        # - All actions with a specific category of a person.
155
        self.cur.execute("""
10303.1.1 by Gary Poster
use newest version of zc.buildout
156
            INSERT INTO KarmaCache
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
157
                (person, category, karmavalue, product, distribution,
158
                 sourcepackagename, project)
159
            SELECT person, category, SUM(karmavalue), NULL, NULL, NULL, NULL
160
            FROM KarmaCache
161
            WHERE category IS NOT NULL
162
            GROUP BY person, category
163
            """)
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
164
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
165
        # - All actions of a person on a given product.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
166
        self.cur.execute("""
10303.1.1 by Gary Poster
use newest version of zc.buildout
167
            INSERT INTO KarmaCache
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
168
                (person, category, karmavalue, product, distribution,
169
                 sourcepackagename, project)
170
            SELECT person, NULL, SUM(karmavalue), product, NULL, NULL, NULL
171
            FROM KarmaCache
172
            WHERE product IS NOT NULL
173
            GROUP BY person, product
174
            """)
175
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
176
        # - All actions of a person on a given distribution.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
177
        self.cur.execute("""
10303.1.1 by Gary Poster
use newest version of zc.buildout
178
            INSERT INTO KarmaCache
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
179
                (person, category, karmavalue, product, distribution,
180
                 sourcepackagename, project)
14050.3.1 by Jeroen Vermeulen
Lint.
181
            SELECT
182
                person, NULL, SUM(karmavalue), NULL, distribution, NULL, NULL
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
183
            FROM KarmaCache
184
            WHERE distribution IS NOT NULL
185
            GROUP BY person, distribution
186
            """)
187
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
188
        # - All actions of a person on a given project.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
189
        self.cur.execute("""
10303.1.1 by Gary Poster
use newest version of zc.buildout
190
            INSERT INTO KarmaCache
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
191
                (person, category, karmavalue, product, distribution,
192
                 sourcepackagename, project)
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
193
            SELECT person, NULL, SUM(karmavalue), NULL, NULL, NULL,
194
                   Product.project
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
195
            FROM KarmaCache
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
196
            JOIN Product ON product = Product.id
197
            WHERE Product.project IS NOT NULL AND product IS NOT NULL
198
                  AND category IS NOT NULL
199
            GROUP BY person, Product.project
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
200
            """)
201
14050.3.1 by Jeroen Vermeulen
Lint.
202
        # - All actions with a specific category of a person on a given
203
        # project.
3691.336.11 by Guilherme Salgado
Some changes suggested by Francis.
204
        # IMPORTANT: This has to be the latest step; otherwise the rows
205
        # inserted here will be included in the calculation of the overall
206
        # karma of a person on a given project.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
207
        self.cur.execute("""
10303.1.1 by Gary Poster
use newest version of zc.buildout
208
            INSERT INTO KarmaCache
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
209
                (person, category, karmavalue, product, distribution,
210
                 sourcepackagename, project)
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
211
            SELECT person, category, SUM(karmavalue), NULL, NULL, NULL,
212
                   Product.project
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
213
            FROM KarmaCache
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
214
            JOIN Product ON product = Product.id
215
            WHERE Product.project IS NOT NULL AND product IS NOT NULL
216
                  AND category IS NOT NULL
217
            GROUP BY person, category, Product.project
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
218
            """)
219
220
    def calculate_scaling(self, results):
221
        """Return a dict of scaling factors keyed on category ID"""
222
223
        # Get a list of categories, which we will need shortly.
224
        categories = {}
225
        self.cur.execute("SELECT id, name from KarmaCategory")
226
        for id, name in self.cur.fetchall():
227
            categories[id] = name
228
229
        # Calculate normalization factor for each category. We currently have
230
        # category bloat, where translators dominate the top karma rankings.
231
        # By calculating a scaling factor automatically, this slant will be
232
        # removed even as more events are added or scoring tweaked.
233
        points_per_category = {}
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
234
        for dummy, category, dummy, dummy, points in results:
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
235
            if category not in points_per_category:
236
                points_per_category[category] = 0
237
            points_per_category[category] += points
238
        largest_total = max(points_per_category.values())
239
240
        scaling = {}
241
        for category, points in points_per_category.items():
242
            if points == 0:
243
                scaling[category] = 1
244
            else:
245
                scaling[category] = float(largest_total) / float(points)
246
            max_scaling = config.karmacacheupdater.max_scaling
247
            if scaling[category] > max_scaling:
7675.357.20 by Stuart Bishop
flush database updates before issuing raw SQL deletes to avoid losing data
248
                self.logger.info(
249
                    'Scaling %s by a factor of %0.4f (capped to %0.4f)'
250
                    % (categories[category], scaling[category], max_scaling))
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
251
                scaling[category] = max_scaling
7675.357.20 by Stuart Bishop
flush database updates before issuing raw SQL deletes to avoid losing data
252
            else:
253
                self.logger.info(
254
                    'Scaling %s by a factor of %0.4f'
255
                    % (categories[category], scaling[category]))
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
256
        return scaling
10303.1.1 by Gary Poster
use newest version of zc.buildout
257
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
258
    def update_one_karma_cache_entry(self, entry, scaling):
259
        """Updates an individual (non-summed) KarmaCache entry.
260
261
        KarmaCache has individual entries, and then it has the summed entries
262
        that correspond to overall contributions across all categories. Look
263
        at C_add_summed_totals to see how the summed entries are generated.
264
        """
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
265
        (person_id, category_id, product_id, distribution_id, points) = entry
14050.3.1 by Jeroen Vermeulen
Lint.
266
        points *= scaling[category_id]  # Scaled. wow.
10303.1.1 by Gary Poster
use newest version of zc.buildout
267
        self.logger.debug("Setting person_id=%d, category_id=%d, points=%d"
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
268
                          % (person_id, category_id, points))
3149.1.4 by Stuart Bishop
Make foaf-update-karma-cache friendlier and less likely to trigger deadlocks
269
3691.9.40 by Guilherme Salgado
Fix a bug in the karmacache updater which was causing the KarmaCache table to bloat absurdly
270
        points = int(points)
3691.9.41 by Guilherme Salgado
some changes suggested by kiko
271
        context = {'product_id': product_id,
3691.336.7 by Guilherme Salgado
Change KarmaCache to store other levels of caches so that we don't need to issue SUM queries on it.
272
                   'distribution_id': distribution_id}
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
273
3691.336.7 by Guilherme Salgado
Change KarmaCache to store other levels of caches so that we don't need to issue SUM queries on it.
274
        try:
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
275
            self.karmacachemanager.updateKarmaValue(
3691.336.7 by Guilherme Salgado
Change KarmaCache to store other levels of caches so that we don't need to issue SUM queries on it.
276
                points, person_id, category_id, **context)
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
277
            self.logger.debug(
278
                "Updated karmacache for person=%s, points=%s, category=%s, "
279
                "context=%s" % (person_id, points, category_id, context))
3691.336.7 by Guilherme Salgado
Change KarmaCache to store other levels of caches so that we don't need to issue SUM queries on it.
280
        except NotFoundError:
281
            # Row didn't exist; do an insert.
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
282
            self.karmacachemanager.new(
3691.336.7 by Guilherme Salgado
Change KarmaCache to store other levels of caches so that we don't need to issue SUM queries on it.
283
                points, person_id, category_id, **context)
3691.336.15 by Guilherme Salgado
Bunch of code changes to cope with new db patch provided by stub
284
            self.logger.debug(
285
                "Created karmacache for person=%s, points=%s, category=%s, "
286
                "context=%s" % (person_id, points, category_id, context))
3691.336.7 by Guilherme Salgado
Change KarmaCache to store other levels of caches so that we don't need to issue SUM queries on it.
287
2976.4.2 by Stuart Bishop
Drop person.karma, replacing with new KarmaTotalCache table
288
2002 by Canonical.com Patch Queue Manager
Implements the karma framework (according to the KarmaImplementation spec) and hook it into Malone events. r=spiv,stub
289
if __name__ == '__main__':
12415.1.1 by William Grant
Stop using txn.set_isolation_level in scripts... run() has an argument for that purpose.
290
    script = KarmaCacheUpdater(
291
        'karma-update',
3691.336.8 by Guilherme Salgado
Convert karma script updater to LaunchpadScript, kill all the bizarre vars(), factor ugliness into nice little boxes, and keep one priceless comment.
292
        dbuser=config.karmacacheupdater.dbuser)
12415.1.1 by William Grant
Stop using txn.set_isolation_level in scripts... run() has an argument for that purpose.
293
    # We use the autocommit transaction isolation level to minimize
294
    # contention. It also allows us to not bother explicitly calling
295
    # COMMIT all the time. However, if we interrupt this script mid-run
296
    # it will need to be re-run as the data will be inconsistent (only
297
    # part of the caches will have been recalculated).
14022.3.2 by William Grant
LaunchpadScript no longer uses initZopeless.
298
    script.lock_and_run(isolation='autocommit')