~launchpad-pqm/launchpad/devel

14027.3.7 by Jeroen Vermeulen
Conflicts.
1
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
8687.15.18 by Karl Fogel
Add the copyright header block to files under lib/canonical/.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
3
4
"""Database garbage collection."""
5
6
__metaclass__ = type
11703.1.3 by Tim Penhey
Format the __all__ export declaration.
7
__all__ = [
8
    'DailyDatabaseGarbageCollector',
9
    'HourlyDatabaseGarbageCollector',
10
    ]
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
11
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
12
from datetime import (
13
    datetime,
14
    timedelta,
15
    )
8758.2.58 by Stuart Bishop
Add a prefix to garbo task log messages to help decode interleaved logs
16
import logging
14027.3.7 by Jeroen Vermeulen
Conflicts.
17
import multiprocessing
12854.1.1 by Gavin Panella
Fix missing import.
18
import os
8758.2.51 by Stuart Bishop
Multithreaded garbo
19
import threading
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
20
import time
21
12854.1.2 by Gavin Panella
Format imports, fix lint.
22
from contrib.glock import (
23
    GlobalLock,
24
    LockAlreadyAcquired,
25
    )
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
26
from psycopg2 import IntegrityError
7675.169.6 by Tim Penhey
Fix the pruning.
27
import pytz
14027.3.7 by Jeroen Vermeulen
Conflicts.
28
from storm.expr import In
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
29
from storm.locals import (
30
    Max,
31
    Min,
32
    SQL,
33
    )
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
34
import transaction
35
from zope.component import getUtility
7675.706.18 by Graham Binns
Added chunking to the BugHeatUpdater DBLoopTuner.
36
from zope.security.proxy import removeSecurityProxy
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
37
8377.9.10 by Michael Hudson
untested introduction of a config value for failure limit
38
from canonical.config import config
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
39
from canonical.database import postgresql
9893.6.54 by Stuart Bishop
Work around Bug #820290
40
from canonical.database.constants import UTC_NOW
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
41
from canonical.database.sqlbase import (
42
    cursor,
7675.809.14 by Robert Collins
Merge trunk resolving conflicts, fingers crossed.
43
    session_store,
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
44
    sqlvalues,
45
    )
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
46
from canonical.launchpad.database.emailaddress import EmailAddress
12854.1.2 by Gavin Panella
Format imports, fix lint.
47
from canonical.launchpad.database.librarian import TimeLimitedToken
8758.4.18 by Stuart Bishop
Remove LoginToken rows older than 1 year
48
from canonical.launchpad.database.logintoken import LoginToken
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
49
from canonical.launchpad.database.oauth import OAuthNonce
7675.88.8 by Stuart Bishop
Stop ShipIt OpenID consumer sharing tables with the SSO server, dev replication setup fixes and test fixes
50
from canonical.launchpad.database.openidconsumer import OpenIDConsumerNonce
13635.1.1 by Ian Booth
Add garbo job to remove old answer contacts
51
from canonical.launchpad.interfaces.account import AccountStatus
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
52
from canonical.launchpad.interfaces.emailaddress import EmailAddressStatus
11787.1.6 by Curtis Hovey
Deglobbed scripts.
53
from canonical.launchpad.interfaces.lpstorm import IMasterStore
7675.395.83 by Stuart Bishop
Foreign keys make MailingListSubscriptionPruner unnecessary
54
from canonical.launchpad.utilities.looptuner import TunableLoop
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
55
from canonical.launchpad.webapp.interfaces import (
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
56
    IStoreSelector,
57
    MAIN_STORE,
58
    MASTER_FLAVOR,
59
    )
13635.1.1 by Ian Booth
Add garbo job to remove old answer contacts
60
from lp.answers.model.answercontact import AnswerContact
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
61
from lp.bugs.interfaces.bug import IBugSet
7675.706.17 by Graham Binns
Updated the bug heat garbo job to use calculate_bug_heat().
62
from lp.bugs.model.bug import Bug
10606.5.2 by Abel Deuring
new garbo job: delete bug attachments that don't have a LibraryFileContent record
63
from lp.bugs.model.bugattachment import BugAttachment
8758.2.6 by Stuart Bishop
BugNotificationPruner
64
from lp.bugs.model.bugnotification import BugNotification
8758.2.46 by Stuart Bishop
Switch BugWatchActivityPruner to use BulkPruner
65
from lp.bugs.model.bugwatch import BugWatchActivity
7675.606.6 by Graham Binns
Linked everything to scheduler.MAX_SAMPLE_SIZE.
66
from lp.bugs.scripts.checkwatches.scheduler import (
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
67
    BugWatchScheduler,
68
    MAX_SAMPLE_SIZE,
69
    )
8590.2.6 by Tim Penhey
More import fixes.
70
from lp.code.interfaces.revision import IRevisionSet
11703.1.2 by Tim Penhey
Add a CodeImportEventPruner.
71
from lp.code.model.codeimportevent import CodeImportEvent
8303.10.7 by James Henstridge
Add a garbo task to remove stale mailing list subscriptions when the
72
from lp.code.model.codeimportresult import CodeImportResult
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
73
from lp.code.model.revision import (
74
    RevisionAuthor,
75
    RevisionCache,
76
    )
77
from lp.hardwaredb.model.hwdb import HWSubmission
8697.25.4 by Stuart Bishop
Optimize PersonPruner
78
from lp.registry.model.person import Person
7675.440.3 by Paul Hummer
Committing at a WTF moment to go take a walk
79
from lp.services.job.model.job import Job
8758.2.58 by Stuart Bishop
Add a prefix to garbo task log messages to help decode interleaved logs
80
from lp.services.log.logger import PrefixFilter
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
81
from lp.services.propertycache import cachedproperty
8758.2.9 by Stuart Bishop
Tests and fixes for BugNotificationPruner
82
from lp.services.scripts.base import (
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
83
    LaunchpadCronScript,
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
84
    LOCK_PATH,
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
85
    SilentLaunchpadScriptFailure,
86
    )
4953.7.7 by Stuart Bishop
Session model classes and session pruner tests
87
from lp.services.session.model import SessionData
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
88
from lp.soyuz.model.publishing import (
89
    BinaryPackagePublishingHistory,
90
    SourcePackagePublishingHistory,
91
    )
7675.758.8 by Jeroen Vermeulen
Okay, okay, running it through garbo instead of cron.
92
from lp.translations.interfaces.potemplate import IPOTemplateSet
14027.3.7 by Jeroen Vermeulen
Conflicts.
93
from lp.translations.model.potmsgset import POTMsgSet
8758.2.27 by Stuart Bishop
POTranslationPruner
94
from lp.translations.model.potranslation import POTranslation
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
95
from lp.translations.model.translationmessage import TranslationMessage
96
from lp.translations.model.translationtemplateitem import (
97
    TranslationTemplateItem,
98
    )
7675.169.6 by Tim Penhey
Fix the pruning.
99
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
100
13635.1.2 by Ian Booth
Lint
101
ONE_DAY_IN_SECONDS = 24 * 60 * 60
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
102
103
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
104
class BulkPruner(TunableLoop):
8758.2.30 by Stuart Bishop
Refactor BulkPruner to use a cursor rather than temporary tables
105
    """A abstract ITunableLoop base class for simple pruners.
106
107
    This is designed for the case where calculating the list of items
108
    is expensive, and this list may be huge. For this use case, it
109
    is impractical to calculate a batch of ids to remove each
110
    iteration.
111
112
    One approach is using a temporary table, populating it
113
    with the set of items to remove at the start. However, this
114
    approach can perform badly as you either need to prune the
115
    temporary table as you go, or using OFFSET to skip to the next
116
    batch to remove which gets slower as we progress further through
117
    the list.
118
119
    Instead, this implementation declares a CURSOR that can be used
120
    across multiple transactions, allowing us to calculate the set
8758.2.31 by Stuart Bishop
BulkPruner tests
121
    of items to remove just once and iterate over it, avoiding the
122
    seek-to-batch issues with a temporary table and OFFSET yet
8758.2.30 by Stuart Bishop
Refactor BulkPruner to use a cursor rather than temporary tables
123
    deleting batches of rows in separate transactions.
124
    """
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
125
126
    # The Storm database class for the table we are removing records
127
    # from. Must be overridden.
128
    target_table_class = None
129
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
130
    # The column name in target_table we use as the key. The type must
131
    # match that returned by the ids_to_prune_query and the
132
    # target_table_key_type. May be overridden.
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
133
    target_table_key = 'id'
134
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
135
    # SQL type of the target_table_key. May be overridden.
13365.3.3 by William Grant
Fix garbo to cope with new pk.
136
    target_table_key_type = 'id integer'
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
137
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
138
    # An SQL query returning a list of ids to remove from target_table.
139
    # The query must return a single column named 'id' and should not
140
    # contain duplicates. Must be overridden.
141
    ids_to_prune_query = None
142
143
    # See `TunableLoop`. May be overridden.
144
    maximum_chunk_size = 10000
145
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
146
    def getStore(self):
147
        """The master Store for the table we are pruning.
148
149
        May be overridden.
150
        """
151
        return IMasterStore(self.target_table_class)
152
8758.2.51 by Stuart Bishop
Multithreaded garbo
153
    _unique_counter = 0
154
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
155
    def __init__(self, log, abort_time=None):
156
        super(BulkPruner, self).__init__(log, abort_time)
8758.2.30 by Stuart Bishop
Refactor BulkPruner to use a cursor rather than temporary tables
157
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
158
        self.store = self.getStore()
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
159
        self.target_table_name = self.target_table_class.__storm_table__
8758.2.30 by Stuart Bishop
Refactor BulkPruner to use a cursor rather than temporary tables
160
8758.2.51 by Stuart Bishop
Multithreaded garbo
161
        self._unique_counter += 1
162
        self.cursor_name = (
163
            'bulkprunerid_%s_%d'
164
            % (self.__class__.__name__, self._unique_counter)).lower()
165
8758.2.30 by Stuart Bishop
Refactor BulkPruner to use a cursor rather than temporary tables
166
        # Open the cursor.
167
        self.store.execute(
8758.2.51 by Stuart Bishop
Multithreaded garbo
168
            "DECLARE %s NO SCROLL CURSOR WITH HOLD FOR %s"
169
            % (self.cursor_name, self.ids_to_prune_query))
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
170
171
    _num_removed = None
172
173
    def isDone(self):
8758.2.35 by Stuart Bishop
Add optional cleanUp method to ITunableLoop
174
        """See `ITunableLoop`."""
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
175
        return self._num_removed == 0
176
177
    def __call__(self, chunk_size):
8758.2.35 by Stuart Bishop
Add optional cleanUp method to ITunableLoop
178
        """See `ITunableLoop`."""
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
179
        result = self.store.execute("""
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
180
            DELETE FROM %s
13365.3.3 by William Grant
Fix garbo to cope with new pk.
181
            WHERE (%s) IN (
182
                SELECT * FROM
183
                cursor_fetch('%s', %d) AS f(%s))
8758.2.30 by Stuart Bishop
Refactor BulkPruner to use a cursor rather than temporary tables
184
            """
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
185
            % (
186
                self.target_table_name, self.target_table_key,
7675.1088.1 by William Grant
Merge stable r12713, resolving conflicts in garbo.
187
                self.cursor_name, chunk_size, self.target_table_key_type))
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
188
        self._num_removed = result.rowcount
189
        transaction.commit()
190
8758.2.35 by Stuart Bishop
Add optional cleanUp method to ITunableLoop
191
    def cleanUp(self):
192
        """See `ITunableLoop`."""
8758.2.51 by Stuart Bishop
Multithreaded garbo
193
        self.store.execute("CLOSE %s" % self.cursor_name)
8758.2.35 by Stuart Bishop
Add optional cleanUp method to ITunableLoop
194
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
195
8758.4.18 by Stuart Bishop
Remove LoginToken rows older than 1 year
196
class LoginTokenPruner(BulkPruner):
197
    """Remove old LoginToken rows.
198
199
    After 1 year, they are useless even for archaeology.
200
    """
201
    target_table_class = LoginToken
202
    ids_to_prune_query = """
203
        SELECT id FROM LoginToken WHERE
204
        created < CURRENT_TIMESTAMP - CAST('1 year' AS interval)
205
        """
206
207
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
208
class POTranslationPruner(BulkPruner):
8758.2.34 by Stuart Bishop
Improvements from review feedback
209
    """Remove unlinked POTranslation entries.
210
211
    XXX bug=723596 StuartBishop: This job only needs to run once per month.
212
    """
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
213
    target_table_class = POTranslation
214
    ids_to_prune_query = """
215
        SELECT POTranslation.id AS id FROM POTranslation
216
        EXCEPT (
7675.1232.3 by William Grant
Don't use POComment in POTranslationPruner.
217
            SELECT msgstr0 FROM TranslationMessage
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
218
                WHERE msgstr0 IS NOT NULL
219
220
            UNION ALL SELECT msgstr1 FROM TranslationMessage
221
                WHERE msgstr1 IS NOT NULL
222
223
            UNION ALL SELECT msgstr2 FROM TranslationMessage
224
                WHERE msgstr2 IS NOT NULL
225
226
            UNION ALL SELECT msgstr3 FROM TranslationMessage
227
                WHERE msgstr3 IS NOT NULL
228
229
            UNION ALL SELECT msgstr4 FROM TranslationMessage
230
                WHERE msgstr4 IS NOT NULL
231
232
            UNION ALL SELECT msgstr5 FROM TranslationMessage
233
                WHERE msgstr5 IS NOT NULL
234
            )
235
        """
236
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
237
238
class SessionPruner(BulkPruner):
239
    """Base class for session removal."""
240
241
    target_table_class = SessionData
242
    target_table_key = 'client_id'
13365.3.3 by William Grant
Fix garbo to cope with new pk.
243
    target_table_key_type = 'id text'
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
244
245
246
class AntiqueSessionPruner(SessionPruner):
247
    """Remove sessions not accessed for 60 days"""
248
249
    ids_to_prune_query = """
250
        SELECT client_id AS id FROM SessionData
251
        WHERE last_accessed < CURRENT_TIMESTAMP - CAST('60 days' AS interval)
252
        """
253
254
255
class UnusedSessionPruner(SessionPruner):
256
    """Remove sessions older than 1 day with no authentication credentials."""
257
258
    ids_to_prune_query = """
259
        SELECT client_id AS id FROM SessionData
260
        WHERE
261
            last_accessed < CURRENT_TIMESTAMP - CAST('1 day' AS interval)
262
            AND client_id NOT IN (
263
                SELECT client_id
264
                FROM SessionPkgData
265
                WHERE
266
                    product_id = 'launchpad.authenticateduser'
267
                    AND key='logintime')
268
        """
269
4953.7.17 by Stuart Bishop
Keep only last 6 authenticated sessions for a user
270
271
class DuplicateSessionPruner(SessionPruner):
272
    """Remove all but the most recent 6 authenticated sessions for a user.
273
274
    We sometimes see users with dozens or thousands of authenticated
275
    sessions. To limit exposure to replay attacks, we remove all but
276
    the most recent 6 of them for a given user.
277
    """
278
279
    ids_to_prune_query = """
280
        SELECT client_id AS id
281
        FROM (
282
            SELECT
283
                sessiondata.client_id,
284
                last_accessed,
285
                rank() OVER pickle AS rank
286
            FROM SessionData, SessionPkgData
287
            WHERE
288
                SessionData.client_id = SessionPkgData.client_id
289
                AND product_id = 'launchpad.authenticateduser'
290
                AND key='accountid'
291
            WINDOW pickle AS (PARTITION BY pickle ORDER BY last_accessed DESC)
292
            ) AS whatever
293
        WHERE
294
            rank > 6
295
            AND last_accessed < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
296
                - CAST('1 hour' AS interval)
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
297
        """
298
8758.2.28 by Stuart Bishop
Breakout POTranslationPruner logic into an abstract base class for reuse
299
8758.2.40 by Stuart Bishop
Switch OAuthNoncePruner to use BulkPruner
300
class OAuthNoncePruner(BulkPruner):
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
301
    """An ITunableLoop to prune old OAuthNonce records.
302
303
    We remove all OAuthNonce records older than 1 day.
304
    """
13365.3.3 by William Grant
Fix garbo to cope with new pk.
305
    target_table_key = 'access_token, request_timestamp, nonce'
306
    target_table_key_type = (
307
        'access_token integer, request_timestamp timestamp without time zone,'
308
        ' nonce text')
8758.2.40 by Stuart Bishop
Switch OAuthNoncePruner to use BulkPruner
309
    target_table_class = OAuthNonce
310
    ids_to_prune_query = """
13365.3.3 by William Grant
Fix garbo to cope with new pk.
311
        SELECT access_token, request_timestamp, nonce FROM OAuthNonce
8758.2.40 by Stuart Bishop
Switch OAuthNoncePruner to use BulkPruner
312
        WHERE request_timestamp
313
            < CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - CAST('1 day' AS interval)
314
        """
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
315
316
8758.3.20 by Stuart Bishop
Add garbo job to rollup BugSummaryJournal
317
class BugSummaryJournalRollup(TunableLoop):
318
    """Rollup BugSummaryJournal rows into BugSummary."""
319
    maximum_chunk_size = 5000
320
321
    def __init__(self, log, abort_time=None):
322
        super(BugSummaryJournalRollup, self).__init__(log, abort_time)
323
        self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
324
325
    def isDone(self):
326
        has_more = self.store.execute(
327
            "SELECT EXISTS (SELECT TRUE FROM BugSummaryJournal LIMIT 1)"
328
            ).get_one()[0]
329
        return not has_more
330
331
    def __call__(self, chunk_size):
332
        chunk_size = int(chunk_size + 0.5)
333
        self.store.execute(
334
            "SELECT bugsummary_rollup_journal(%s)", (chunk_size,),
335
            noresult=True)
336
        self.store.commit()
337
338
7675.88.8 by Stuart Bishop
Stop ShipIt OpenID consumer sharing tables with the SSO server, dev replication setup fixes and test fixes
339
class OpenIDConsumerNoncePruner(TunableLoop):
340
    """An ITunableLoop to prune old OpenIDConsumerNonce records.
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
341
7675.88.8 by Stuart Bishop
Stop ShipIt OpenID consumer sharing tables with the SSO server, dev replication setup fixes and test fixes
342
    We remove all OpenIDConsumerNonce records older than 1 day.
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
343
    """
13635.1.2 by Ian Booth
Lint
344
    maximum_chunk_size = 6 * 60 * 60  # 6 hours in seconds.
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
345
8870.1.5 by Stuart Bishop
Make abort_time parameters optional
346
    def __init__(self, log, abort_time=None):
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
347
        super(OpenIDConsumerNoncePruner, self).__init__(log, abort_time)
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
348
        self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
349
        self.earliest_timestamp = self.store.find(
7675.88.8 by Stuart Bishop
Stop ShipIt OpenID consumer sharing tables with the SSO server, dev replication setup fixes and test fixes
350
            Min(OpenIDConsumerNonce.timestamp)).one()
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
351
        utc_now = int(time.mktime(time.gmtime()))
352
        self.earliest_wanted_timestamp = utc_now - ONE_DAY_IN_SECONDS
353
354
    def isDone(self):
355
        return (
356
            self.earliest_timestamp is None
357
            or self.earliest_timestamp >= self.earliest_wanted_timestamp)
358
359
    def __call__(self, chunk_size):
360
        self.earliest_timestamp = min(
361
            self.earliest_wanted_timestamp,
362
            self.earliest_timestamp + chunk_size)
363
7675.177.7 by Stuart Bishop
Ensure tests run multiple iterations of the LoopTuner and add logging
364
        self.log.debug(
365
            "Removing OpenIDConsumerNonce rows older than %s"
366
            % self.earliest_timestamp)
367
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
368
        self.store.find(
7675.88.8 by Stuart Bishop
Stop ShipIt OpenID consumer sharing tables with the SSO server, dev replication setup fixes and test fixes
369
            OpenIDConsumerNonce,
370
            OpenIDConsumerNonce.timestamp < self.earliest_timestamp).remove()
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
371
        transaction.commit()
372
373
10556.4.1 by Guilherme Salgado
Get rid of the AUTH store and ditch c-i-p
374
class OpenIDConsumerAssociationPruner(TunableLoop):
7675.177.6 by Stuart Bishop
Garbage collect OpenIDAssociations, allow scripts to correct to the auth store as the correct database user, ensure changing database connection settings in the test suite resets ZStorm so new credentials are used
375
    minimum_chunk_size = 3500
376
    maximum_chunk_size = 50000
377
10556.4.1 by Guilherme Salgado
Get rid of the AUTH store and ditch c-i-p
378
    table_name = 'OpenIDConsumerAssociation'
7675.177.6 by Stuart Bishop
Garbage collect OpenIDAssociations, allow scripts to correct to the auth store as the correct database user, ensure changing database connection settings in the test suite resets ZStorm so new credentials are used
379
380
    _num_removed = None
381
8870.1.5 by Stuart Bishop
Make abort_time parameters optional
382
    def __init__(self, log, abort_time=None):
10556.4.1 by Guilherme Salgado
Get rid of the AUTH store and ditch c-i-p
383
        super(OpenIDConsumerAssociationPruner, self).__init__(log, abort_time)
384
        self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
7675.177.6 by Stuart Bishop
Garbage collect OpenIDAssociations, allow scripts to correct to the auth store as the correct database user, ensure changing database connection settings in the test suite resets ZStorm so new credentials are used
385
386
    def __call__(self, chunksize):
387
        result = self.store.execute("""
388
            DELETE FROM %s
389
            WHERE (server_url, handle) IN (
390
                SELECT server_url, handle FROM %s
391
                WHERE issued + lifetime <
392
                    EXTRACT(EPOCH FROM CURRENT_TIMESTAMP)
393
                LIMIT %d
394
                )
395
            """ % (self.table_name, self.table_name, int(chunksize)))
8758.2.27 by Stuart Bishop
POTranslationPruner
396
        self._num_removed = result.rowcount
7675.177.6 by Stuart Bishop
Garbage collect OpenIDAssociations, allow scripts to correct to the auth store as the correct database user, ensure changing database connection settings in the test suite resets ZStorm so new credentials are used
397
        transaction.commit()
398
399
    def isDone(self):
400
        return self._num_removed == 0
401
402
7675.169.6 by Tim Penhey
Fix the pruning.
403
class RevisionCachePruner(TunableLoop):
404
    """A tunable loop to remove old revisions from the cache."""
405
406
    maximum_chunk_size = 100
407
408
    def isDone(self):
409
        """We are done when there are no old revisions to delete."""
410
        epoch = datetime.now(pytz.UTC) - timedelta(days=30)
411
        store = IMasterStore(RevisionCache)
412
        results = store.find(
413
            RevisionCache, RevisionCache.revision_date < epoch)
7675.169.8 by Tim Penhey
Fix the logic so the script actually exits.
414
        return results.count() == 0
7675.169.6 by Tim Penhey
Fix the pruning.
415
416
    def __call__(self, chunk_size):
7675.169.3 by Tim Penhey
Add methods to populate and prune the RevisionCache.
417
        """Delegate to the `IRevisionSet` implementation."""
7675.169.6 by Tim Penhey
Fix the pruning.
418
        getUtility(IRevisionSet).pruneRevisionCache(chunk_size)
419
        transaction.commit()
7675.169.3 by Tim Penhey
Add methods to populate and prune the RevisionCache.
420
421
8758.2.42 by Stuart Bishop
Switch CodeImportEventPruner to use BulkPruner
422
class CodeImportEventPruner(BulkPruner):
11703.1.2 by Tim Penhey
Add a CodeImportEventPruner.
423
    """Prune `CodeImportEvent`s that are more than a month old.
424
7675.166.301 by Stuart Bishop
Replace In(col, i) with col.is_in(u) to work around Bug #670906 and delint
425
    Events that happened more than 30 days ago are really of no
426
    interest to us.
11703.1.2 by Tim Penhey
Add a CodeImportEventPruner.
427
    """
8758.2.42 by Stuart Bishop
Switch CodeImportEventPruner to use BulkPruner
428
    target_table_class = CodeImportEvent
429
    ids_to_prune_query = """
430
        SELECT id FROM CodeImportEvent
431
        WHERE date_created < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
432
            - CAST('30 days' AS interval)
433
        """
11703.1.2 by Tim Penhey
Add a CodeImportEventPruner.
434
435
8758.2.43 by Stuart Bishop
Switch CodeImportResultPruner to use BulkPruner
436
class CodeImportResultPruner(BulkPruner):
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
437
    """A TunableLoop to prune unwanted CodeImportResult rows.
438
439
    Removes CodeImportResult rows if they are older than 30 days
8377.9.10 by Michael Hudson
untested introduction of a config value for failure limit
440
    and they are not one of the most recent results for that
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
441
    CodeImport.
442
    """
8758.2.43 by Stuart Bishop
Switch CodeImportResultPruner to use BulkPruner
443
    target_table_class = CodeImportResult
444
    ids_to_prune_query = """
8758.2.49 by Stuart Bishop
Simplify CodeImportResultPruner by using a window function
445
        SELECT id FROM (
446
            SELECT id, date_created, rank() OVER w AS rank
447
            FROM CodeImportResult
448
            WINDOW w AS (PARTITION BY code_import ORDER BY date_created DESC)
449
            ) AS whatever
450
        WHERE
451
            rank > %s
452
            AND date_created < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
453
                - CAST('30 days' AS interval)
8758.2.43 by Stuart Bishop
Switch CodeImportResultPruner to use BulkPruner
454
            """ % sqlvalues(config.codeimport.consecutive_failure_limit - 1)
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
455
456
8303.10.5 by James Henstridge
Update tests that expect RevisionAuthors or HWSubmissions to be linked
457
class RevisionAuthorEmailLinker(TunableLoop):
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
458
    """A TunableLoop that links `RevisionAuthor` objects to `Person` objects.
459
460
    `EmailAddress` objects are looked up for `RevisionAuthor` objects
461
    that have not yet been linked to a `Person`.  If the
8303.10.8 by James Henstridge
Fix docstrings as per Abel's review. Change maximum chunk sizes based
462
    `EmailAddress` is linked to a person, then the `RevisionAuthor` is
463
    linked to the same.
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
464
    """
465
8303.10.8 by James Henstridge
Fix docstrings as per Abel's review. Change maximum chunk sizes based
466
    maximum_chunk_size = 1000
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
467
8870.1.5 by Stuart Bishop
Make abort_time parameters optional
468
    def __init__(self, log, abort_time=None):
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
469
        super(RevisionAuthorEmailLinker, self).__init__(log, abort_time)
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
470
        self.author_store = IMasterStore(RevisionAuthor)
471
        self.email_store = IMasterStore(EmailAddress)
472
473
        (self.min_author_id,
474
         self.max_author_id) = self.author_store.find(
475
            (Min(RevisionAuthor.id), Max(RevisionAuthor.id))).one()
476
477
        self.next_author_id = self.min_author_id
478
479
    def isDone(self):
480
        return (self.min_author_id is None or
481
                self.next_author_id > self.max_author_id)
482
483
    def __call__(self, chunk_size):
484
        result = self.author_store.find(
485
            RevisionAuthor,
486
            RevisionAuthor.id >= self.next_author_id,
487
            RevisionAuthor.personID == None,
488
            RevisionAuthor.email != None)
489
        result.order_by(RevisionAuthor.id)
490
        authors = list(result[:chunk_size])
491
492
        # No more authors found.
493
        if len(authors) == 0:
494
            self.next_author_id = self.max_author_id + 1
495
            transaction.commit()
496
            return
497
498
        emails = dict(self.email_store.find(
499
            (EmailAddress.email.lower(), EmailAddress.personID),
500
            EmailAddress.email.lower().is_in(
501
                    [author.email.lower() for author in authors]),
502
            EmailAddress.status.is_in([EmailAddressStatus.PREFERRED,
503
                                       EmailAddressStatus.VALIDATED]),
504
            EmailAddress.personID != None))
505
506
        if emails:
507
            for author in authors:
508
                personID = emails.get(author.email.lower())
509
                if personID is None:
510
                    continue
511
                author.personID = personID
512
513
        self.next_author_id = authors[-1].id + 1
514
        transaction.commit()
515
516
8303.10.5 by James Henstridge
Update tests that expect RevisionAuthors or HWSubmissions to be linked
517
class HWSubmissionEmailLinker(TunableLoop):
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
518
    """A TunableLoop that links `HWSubmission` objects to `Person` objects.
519
520
    `EmailAddress` objects are looked up for `HWSubmission` objects
521
    that have not yet been linked to a `Person`.  If the
8303.10.8 by James Henstridge
Fix docstrings as per Abel's review. Change maximum chunk sizes based
522
    `EmailAddress` is linked to a person, then the `HWSubmission` is
523
    linked to the same.
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
524
    """
8579.2.3 by Stuart Bishop
Refactor HWSubmission linking and allow garbo to continue even if one tunable loop died
525
    maximum_chunk_size = 50000
7675.758.8 by Jeroen Vermeulen
Okay, okay, running it through garbo instead of cron.
526
8870.1.5 by Stuart Bishop
Make abort_time parameters optional
527
    def __init__(self, log, abort_time=None):
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
528
        super(HWSubmissionEmailLinker, self).__init__(log, abort_time)
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
529
        self.submission_store = IMasterStore(HWSubmission)
8605.1.1 by Abel Deuring
fixed a test failure acused by the new implementation of updating HWSubmission.owner for newly registered email addresses.
530
        self.submission_store.execute(
531
            "DROP TABLE IF EXISTS NewlyMatchedSubmission")
8697.5.1 by Stuart Bishop
HWSubmissionEmailLinker needs to join with the Person table to avoid replication issues
532
        # The join with the Person table is to avoid any replication
533
        # lag issues - EmailAddress.person might reference a Person
534
        # that does not yet exist.
8579.2.3 by Stuart Bishop
Refactor HWSubmission linking and allow garbo to continue even if one tunable loop died
535
        self.submission_store.execute("""
536
            CREATE TEMPORARY TABLE NewlyMatchedSubmission AS
537
            SELECT
538
                HWSubmission.id AS submission,
539
                EmailAddress.person AS owner
8697.5.1 by Stuart Bishop
HWSubmissionEmailLinker needs to join with the Person table to avoid replication issues
540
            FROM HWSubmission, EmailAddress, Person
8579.2.3 by Stuart Bishop
Refactor HWSubmission linking and allow garbo to continue even if one tunable loop died
541
            WHERE HWSubmission.owner IS NULL
8697.5.1 by Stuart Bishop
HWSubmissionEmailLinker needs to join with the Person table to avoid replication issues
542
                AND EmailAddress.person = Person.id
8579.2.3 by Stuart Bishop
Refactor HWSubmission linking and allow garbo to continue even if one tunable loop died
543
                AND EmailAddress.status IN %s
544
                AND lower(HWSubmission.raw_emailaddress)
545
                    = lower(EmailAddress.email)
546
            """ % sqlvalues(
547
                [EmailAddressStatus.VALIDATED, EmailAddressStatus.PREFERRED]),
548
            noresult=True)
549
        self.submission_store.execute("""
550
            CREATE INDEX newlymatchsubmission__submission__idx
551
            ON NewlyMatchedSubmission(submission)
552
            """, noresult=True)
553
        self.matched_submission_count = self.submission_store.execute("""
554
            SELECT COUNT(*) FROM NewlyMatchedSubmission
555
            """).get_one()[0]
556
        self.offset = 0
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
557
558
    def isDone(self):
8579.2.3 by Stuart Bishop
Refactor HWSubmission linking and allow garbo to continue even if one tunable loop died
559
        return self.offset >= self.matched_submission_count
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
560
561
    def __call__(self, chunk_size):
8579.2.3 by Stuart Bishop
Refactor HWSubmission linking and allow garbo to continue even if one tunable loop died
562
        self.submission_store.execute("""
563
            UPDATE HWSubmission
564
            SET owner=NewlyMatchedSubmission.owner
565
            FROM (
566
                SELECT submission, owner
567
                FROM NewlyMatchedSubmission
568
                ORDER BY submission
569
                OFFSET %d
570
                LIMIT %d
571
                ) AS NewlyMatchedSubmission
572
            WHERE HWSubmission.id = NewlyMatchedSubmission.submission
573
            """ % (self.offset, chunk_size), noresult=True)
574
        self.offset += chunk_size
8303.10.1 by James Henstridge
Garbo jobs to link people to RevisionAuthors and HWSubmissions as new
575
        transaction.commit()
576
577
8697.25.4 by Stuart Bishop
Optimize PersonPruner
578
class PersonPruner(TunableLoop):
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
579
580
    maximum_chunk_size = 1000
581
8870.1.5 by Stuart Bishop
Make abort_time parameters optional
582
    def __init__(self, log, abort_time=None):
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
583
        super(PersonPruner, self).__init__(log, abort_time)
8758.2.4 by Stuart Bishop
Tests to ensure PersonEmailAddressLinkChecker actually detects and reports corruption.
584
        self.offset = 1
8697.25.4 by Stuart Bishop
Optimize PersonPruner
585
        self.store = IMasterStore(Person)
586
        self.log.debug("Creating LinkedPeople temporary table.")
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
587
        self.store.execute(
588
            "CREATE TEMPORARY TABLE LinkedPeople(person integer primary key)")
8697.25.2 by Guilherme Salgado
Skip Person entries created after 2009-04-01, as that's when our OpenID provider started creating personless accounts
589
        # Prefill with Person entries created after our OpenID provider
590
        # started creating personless accounts on signup.
8697.25.4 by Stuart Bishop
Optimize PersonPruner
591
        self.log.debug(
592
            "Populating LinkedPeople with post-OpenID created Person.")
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
593
        self.store.execute("""
594
            INSERT INTO LinkedPeople
595
            SELECT id FROM Person
8697.25.2 by Guilherme Salgado
Skip Person entries created after 2009-04-01, as that's when our OpenID provider started creating personless accounts
596
            WHERE datecreated > '2009-04-01'
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
597
            """)
8697.25.4 by Stuart Bishop
Optimize PersonPruner
598
        transaction.commit()
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
599
        for (from_table, from_column, to_table, to_column, uflag, dflag) in (
600
                postgresql.listReferences(cursor(), 'person', 'id')):
601
            # Skip things that don't link to Person.id or that link to it from
7675.1039.15 by Gary Poster
revert my inadvertently bad change to the grammar of a comment.
602
            # TeamParticipation or EmailAddress, as all Person entries will be
7675.1039.12 by Gary Poster
fix remaining tests.
603
            # linked to from these tables.  Similarly, PersonSettings can
604
            # simply be deleted if it exists, because it has a 1 (or 0) to 1
605
            # relationship with Person.
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
606
            if (to_table != 'person' or to_column != 'id'
7675.1039.12 by Gary Poster
fix remaining tests.
607
                or from_table in ('teamparticipation', 'emailaddress',
608
                                  'personsettings')):
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
609
                continue
8697.25.4 by Stuart Bishop
Optimize PersonPruner
610
            self.log.debug(
611
                "Populating LinkedPeople from %s.%s"
612
                % (from_table, from_column))
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
613
            self.store.execute("""
614
                INSERT INTO LinkedPeople
8697.25.4 by Stuart Bishop
Optimize PersonPruner
615
                SELECT DISTINCT %(from_column)s AS person
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
616
                FROM %(from_table)s
617
                WHERE %(from_column)s IS NOT NULL
8697.25.4 by Stuart Bishop
Optimize PersonPruner
618
                EXCEPT ALL
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
619
                SELECT person FROM LinkedPeople
620
                """ % dict(from_table=from_table, from_column=from_column))
8697.25.4 by Stuart Bishop
Optimize PersonPruner
621
            transaction.commit()
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
622
8697.25.4 by Stuart Bishop
Optimize PersonPruner
623
        self.log.debug("Creating UnlinkedPeople temporary table.")
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
624
        self.store.execute("""
625
            CREATE TEMPORARY TABLE UnlinkedPeople(
626
                id serial primary key, person integer);
8697.25.4 by Stuart Bishop
Optimize PersonPruner
627
            """)
628
        self.log.debug("Populating UnlinkedPeople.")
629
        self.store.execute("""
630
            INSERT INTO UnlinkedPeople (person) (
631
                SELECT id AS person FROM Person
632
                WHERE teamowner IS NULL
633
                EXCEPT ALL
634
                SELECT person FROM LinkedPeople);
635
            """)
636
        transaction.commit()
637
        self.log.debug("Indexing UnlinkedPeople.")
638
        self.store.execute("""
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
639
            CREATE UNIQUE INDEX unlinkedpeople__person__idx ON
640
                UnlinkedPeople(person);
8697.25.4 by Stuart Bishop
Optimize PersonPruner
641
            """)
642
        self.log.debug("Analyzing UnlinkedPeople.")
643
        self.store.execute("""
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
644
            ANALYZE UnlinkedPeople;
645
            """)
8697.25.4 by Stuart Bishop
Optimize PersonPruner
646
        self.log.debug("Counting UnlinkedPeople.")
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
647
        self.max_offset = self.store.execute(
648
            "SELECT MAX(id) FROM UnlinkedPeople").get_one()[0]
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
649
        if self.max_offset is None:
13635.1.2 by Ian Booth
Lint
650
            self.max_offset = -1  # Trigger isDone() now.
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
651
            self.log.debug("No Person records to remove.")
652
        else:
653
            self.log.info("%d Person records to remove." % self.max_offset)
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
654
        # Don't keep any locks open - we might block.
655
        transaction.commit()
656
657
    def isDone(self):
658
        return self.offset > self.max_offset
659
660
    def __call__(self, chunk_size):
661
        subquery = """
662
            SELECT person FROM UnlinkedPeople
663
            WHERE id BETWEEN %d AND %d
8758.2.3 by Stuart Bishop
Minor off-by-one
664
            """ % (self.offset, self.offset + chunk_size - 1)
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
665
        people_ids = ",".join(
666
            str(item[0]) for item in self.store.execute(subquery).get_all())
667
        self.offset += chunk_size
668
        try:
669
            # This would be dangerous if we were deleting a
670
            # team, so join with Person to ensure it isn't one
671
            # even in the rare case a person is converted to
672
            # a team during this run.
673
            self.store.execute("""
674
                DELETE FROM TeamParticipation
675
                USING Person
676
                WHERE TeamParticipation.person = Person.id
677
                    AND Person.teamowner IS NULL
678
                    AND Person.id IN (%s)
679
                """ % people_ids)
680
            self.store.execute("""
681
                UPDATE EmailAddress SET person=NULL
682
                WHERE person IN (%s)
683
                """ % people_ids)
7675.1039.12 by Gary Poster
fix remaining tests.
684
            # This cascade deletes any PersonSettings records.
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
685
            self.store.execute("""
686
                DELETE FROM Person
687
                WHERE id IN (%s)
688
                """ % people_ids)
689
            transaction.commit()
690
            self.log.debug(
691
                "Deleted the following unlinked people: %s" % people_ids)
692
        except IntegrityError:
693
            # This case happens when a Person is linked to something
8758.2.9 by Stuart Bishop
Tests and fixes for BugNotificationPruner
694
            # during the run. It is unlikely to occur, so just ignore
695
            # it again. Everything will clear up next run.
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
696
            transaction.abort()
8697.25.4 by Stuart Bishop
Optimize PersonPruner
697
            self.log.warning(
698
                "Failed to delete %d Person records. Left for next time."
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
699
                % chunk_size)
700
701
8758.2.44 by Stuart Bishop
Switch BugNotificationPruner to use BulkPruner
702
class BugNotificationPruner(BulkPruner):
8758.2.6 by Stuart Bishop
BugNotificationPruner
703
    """Prune `BugNotificationRecipient` records no longer of interest.
704
705
    We discard all rows older than 30 days that have been sent. We
706
    keep 30 days worth or records to help diagnose email delivery issues.
707
    """
8758.2.44 by Stuart Bishop
Switch BugNotificationPruner to use BulkPruner
708
    target_table_class = BugNotification
709
    ids_to_prune_query = """
710
        SELECT BugNotification.id FROM BugNotification
711
        WHERE date_emailed < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
712
            - CAST('30 days' AS interval)
713
        """
8758.2.6 by Stuart Bishop
BugNotificationPruner
714
715
13635.1.1 by Ian Booth
Add garbo job to remove old answer contacts
716
class AnswerContactPruner(BulkPruner):
717
    """Remove old answer contacts which are no longer required.
718
719
    Remove a person as an answer contact if:
720
      their account has been deactivated for more than one day, or
721
      suspended for more than one week.
722
    """
723
    target_table_class = AnswerContact
724
    ids_to_prune_query = """
725
        SELECT DISTINCT AnswerContact.id
726
        FROM AnswerContact, Person, Account
727
        WHERE
728
            AnswerContact.person = Person.id
729
            AND Person.account = Account.id
730
            AND (
731
                (Account.date_status_set <
732
                CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
733
                - CAST('1 day' AS interval)
734
                AND Account.status = %s)
735
                OR
736
                (Account.date_status_set <
737
                CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
738
                - CAST('7 days' AS interval)
739
                AND Account.status = %s)
740
            )
741
        """ % (AccountStatus.DEACTIVATED.value, AccountStatus.SUSPENDED.value)
742
743
8758.2.45 by Stuart Bishop
Switch BranchJobPruner to use BulkPruner
744
class BranchJobPruner(BulkPruner):
7675.758.8 by Jeroen Vermeulen
Okay, okay, running it through garbo instead of cron.
745
    """Prune `BranchJob`s that are in a final state and more than a month old.
7675.440.1 by Paul Hummer
Added JobPruner to garbo daily with accompanying tests
746
7675.440.8 by Paul Hummer
Turned the job pruner into a branch job pruner
747
    When a BranchJob is completed, it gets set to a final state.  These jobs
7675.440.1 by Paul Hummer
Added JobPruner to garbo daily with accompanying tests
748
    should be pruned from the database after a month.
749
    """
8758.2.45 by Stuart Bishop
Switch BranchJobPruner to use BulkPruner
750
    target_table_class = Job
751
    ids_to_prune_query = """
752
        SELECT DISTINCT Job.id
753
        FROM Job, BranchJob
754
        WHERE
755
            Job.id = BranchJob.job
756
            AND Job.date_finished < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
757
                - CAST('30 days' AS interval)
758
        """
7675.440.1 by Paul Hummer
Added JobPruner to garbo daily with accompanying tests
759
760
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
761
class BugHeatUpdater(TunableLoop):
762
    """A `TunableLoop` for bug heat calculations."""
763
9893.6.55 by Stuart Bishop
Fix BugHeatUpdater tests
764
    maximum_chunk_size = 5000
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
765
7675.582.7 by Graham Binns
Re-added BugHeatUpdater to garbo-hourly. Tests are making me cry.
766
    def __init__(self, log, abort_time=None, max_heat_age=None):
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
767
        super(BugHeatUpdater, self).__init__(log, abort_time)
768
        self.transaction = transaction
7675.582.9 by Graham Binns
Re-implemented the BugHeatUpdater so that it now adds jobs instead of updating heat itself.
769
        self.total_processed = 0
7675.582.7 by Graham Binns
Re-added BugHeatUpdater to garbo-hourly. Tests are making me cry.
770
        self.is_done = False
7675.582.11 by Graham Binns
Fixed infinite recursion in the BugHeatUpdater.
771
        self.offset = 0
7675.582.7 by Graham Binns
Re-added BugHeatUpdater to garbo-hourly. Tests are making me cry.
772
        if max_heat_age is None:
773
            max_heat_age = config.calculate_bug_heat.max_heat_age
7675.582.8 by Graham Binns
Hurrah, fixed the tests.
774
        self.max_heat_age = max_heat_age
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
775
7675.706.18 by Graham Binns
Added chunking to the BugHeatUpdater DBLoopTuner.
776
        self.store = IMasterStore(Bug)
777
778
    @property
779
    def _outdated_bugs(self):
780
        outdated_bugs = getUtility(IBugSet).getBugsWithOutdatedHeat(
781
            self.max_heat_age)
782
        # We remove the security proxy so that we can access the set()
783
        # method of the result set.
784
        return removeSecurityProxy(outdated_bugs)
785
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
786
    def isDone(self):
787
        """See `ITunableLoop`."""
788
        # When the main loop has no more Bugs to process it sets
789
        # offset to None. Until then, it always has a numerical
790
        # value.
7675.706.18 by Graham Binns
Added chunking to the BugHeatUpdater DBLoopTuner.
791
        return self._outdated_bugs.is_empty()
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
792
793
    def __call__(self, chunk_size):
794
        """Retrieve a batch of Bugs and update their heat.
795
796
        See `ITunableLoop`.
797
        """
9893.6.54 by Stuart Bishop
Work around Bug #820290
798
        chunk_size = int(chunk_size + 0.5)
7675.706.18 by Graham Binns
Added chunking to the BugHeatUpdater DBLoopTuner.
799
        outdated_bugs = self._outdated_bugs[:chunk_size]
9893.6.54 by Stuart Bishop
Work around Bug #820290
800
        # We don't use outdated_bugs.set() here to work around
801
        # Storm Bug #820290.
9893.6.56 by Stuart Bishop
Improvements from review
802
        outdated_bug_ids = [bug.id for bug in outdated_bugs]
803
        self.log.debug("Updating heat for %s bugs", len(outdated_bug_ids))
804
        IMasterStore(Bug).find(
805
            Bug, Bug.id.is_in(outdated_bug_ids)).set(
806
                heat=SQL('calculate_bug_heat(Bug.id)'),
807
                heat_last_updated=UTC_NOW)
10124.2.15 by Graham Binns
Moved _getHeatFrom*() tests into unittests.
808
        transaction.commit()
10124.2.13 by Graham Binns
Moved the BugHeatUpdater tunable loop into garbo-daily.
809
810
8758.2.46 by Stuart Bishop
Switch BugWatchActivityPruner to use BulkPruner
811
class BugWatchActivityPruner(BulkPruner):
7675.606.1 by Graham Binns
Added tests for getting prunable BugWatches.
812
    """A TunableLoop to prune BugWatchActivity entries."""
8758.2.46 by Stuart Bishop
Switch BugWatchActivityPruner to use BulkPruner
813
    target_table_class = BugWatchActivity
814
    # For each bug_watch, remove all but the most recent MAX_SAMPLE_SIZE
815
    # entries.
816
    ids_to_prune_query = """
817
        SELECT id FROM (
818
            SELECT id, rank() OVER w AS rank
819
            FROM BugWatchActivity
820
            WINDOW w AS (PARTITION BY bug_watch ORDER BY id DESC)
821
            ) AS whatever
822
        WHERE rank > %s
823
        """ % sqlvalues(MAX_SAMPLE_SIZE)
7675.606.3 by Graham Binns
Added tests for isDone() and __call__().
824
7675.606.1 by Graham Binns
Added tests for getting prunable BugWatches.
825
8758.2.47 by Stuart Bishop
Switch ObsoleteBugAttachmentPruner to use BulkPruner
826
class ObsoleteBugAttachmentPruner(BulkPruner):
10606.5.2 by Abel Deuring
new garbo job: delete bug attachments that don't have a LibraryFileContent record
827
    """Delete bug attachments without a LibraryFileContent record.
828
829
    Our database schema allows LibraryFileAlias records that have no
830
    corresponding LibraryFileContent records.
831
832
    This class deletes bug attachments that reference such "content free"
833
    and thus completely useless LFA records.
834
    """
8758.2.47 by Stuart Bishop
Switch ObsoleteBugAttachmentPruner to use BulkPruner
835
    target_table_class = BugAttachment
836
    ids_to_prune_query = """
837
        SELECT BugAttachment.id
838
        FROM BugAttachment, LibraryFileAlias
839
        WHERE
840
            BugAttachment.libraryfile = LibraryFileAlias.id
841
            AND LibraryFileAlias.content IS NULL
842
        """
10606.5.2 by Abel Deuring
new garbo job: delete bug attachments that don't have a LibraryFileContent record
843
844
7675.809.6 by Robert Collins
Create a garbo daily task to clean up file access tokens.
845
class OldTimeLimitedTokenDeleter(TunableLoop):
846
    """Delete expired url access tokens from the session DB."""
847
13635.1.2 by Ian Booth
Lint
848
    maximum_chunk_size = 24 * 60 * 60  # 24 hours in seconds.
7675.809.6 by Robert Collins
Create a garbo daily task to clean up file access tokens.
849
850
    def __init__(self, log, abort_time=None):
851
        super(OldTimeLimitedTokenDeleter, self).__init__(log, abort_time)
852
        self.store = session_store()
853
        self._update_oldest()
854
855
    def _update_oldest(self):
856
        self.oldest_age = self.store.execute("""
857
            SELECT COALESCE(EXTRACT(EPOCH FROM
858
                CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
859
                - MIN(created)), 0)
860
            FROM TimeLimitedToken
861
            """).get_one()[0]
862
863
    def isDone(self):
864
        return self.oldest_age <= ONE_DAY_IN_SECONDS
865
866
    def __call__(self, chunk_size):
867
        self.oldest_age = max(
868
            ONE_DAY_IN_SECONDS, self.oldest_age - chunk_size)
869
870
        self.log.debug(
871
            "Removed TimeLimitedToken rows older than %d seconds"
872
            % self.oldest_age)
873
        self.store.find(
874
            TimeLimitedToken,
875
            TimeLimitedToken.created < SQL(
876
                "CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval '%d seconds'"
877
                % ONE_DAY_IN_SECONDS)).remove()
878
        transaction.commit()
879
        self._update_oldest()
880
881
7675.758.8 by Jeroen Vermeulen
Okay, okay, running it through garbo instead of cron.
882
class SuggestiveTemplatesCacheUpdater(TunableLoop):
883
    """Refresh the SuggestivePOTemplate cache.
884
885
    This isn't really a TunableLoop.  It just pretends to be one to fit
886
    in with the garbo crowd.
887
    """
888
    maximum_chunk_size = 1
889
890
    done = False
891
892
    def isDone(self):
893
        """See `TunableLoop`."""
894
        return self.done
895
896
    def __call__(self, chunk_size):
897
        """See `TunableLoop`."""
898
        utility = getUtility(IPOTemplateSet)
899
        utility.wipeSuggestivePOTemplatesCache()
900
        utility.populateSuggestivePOTemplatesCache()
901
        transaction.commit()
902
        self.done = True
903
904
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
905
class UnusedPOTMsgSetPruner(TunableLoop):
906
    """Cleans up unused POTMsgSets."""
907
908
    done = False
909
    offset = 0
910
    maximum_chunk_size = 50000
911
912
    def isDone(self):
913
        """See `TunableLoop`."""
914
        return self.offset >= len(self.msgset_ids_to_remove)
915
916
    @cachedproperty
917
    def msgset_ids_to_remove(self):
918
        """Return the IDs of the POTMsgSets to remove."""
919
        query = """
920
            -- Get all POTMsgSet IDs which are obsolete (sequence == 0)
921
            -- and are not used (sequence != 0) in any other template.
13581.1.2 by Danilo Segan
Use better performing of the query provided by Stuart.
922
            SELECT POTMsgSet
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
923
              FROM TranslationTemplateItem tti
924
              WHERE sequence=0 AND
925
              NOT EXISTS(
926
                SELECT id
927
                  FROM TranslationTemplateItem
13581.1.2 by Danilo Segan
Use better performing of the query provided by Stuart.
928
                  WHERE potmsgset = tti.potmsgset AND sequence != 0)
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
929
            UNION
930
            -- Get all POTMsgSet IDs which are not referenced
931
            -- by any of the templates (they must have TTI rows for that).
932
            (SELECT POTMsgSet.id
13581.1.2 by Danilo Segan
Use better performing of the query provided by Stuart.
933
               FROM POTMsgSet
934
               LEFT OUTER JOIN TranslationTemplateItem
935
                 ON TranslationTemplateItem.potmsgset = POTMsgSet.id
936
               WHERE
937
                 TranslationTemplateItem.potmsgset IS NULL);
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
938
            """
939
        store = IMasterStore(POTMsgSet)
940
        results = store.execute(query)
941
        ids_to_remove = [id for (id,) in results.get_all()]
942
        return ids_to_remove
943
944
    def __call__(self, chunk_size):
945
        """See `TunableLoop`."""
946
        # We cast chunk_size to an int to avoid issues with slicing
947
        # (DBLoopTuner passes in a float).
948
        chunk_size = int(chunk_size)
949
        msgset_ids_to_remove = (
950
            self.msgset_ids_to_remove[self.offset:][:chunk_size])
951
        # Remove related TranslationTemplateItems.
952
        store = IMasterStore(POTMsgSet)
953
        related_ttis = store.find(
954
            TranslationTemplateItem,
955
            In(TranslationTemplateItem.potmsgsetID, msgset_ids_to_remove))
956
        related_ttis.remove()
957
        # Remove related TranslationMessages.
958
        related_translation_messages = store.find(
959
            TranslationMessage,
960
            In(TranslationMessage.potmsgsetID, msgset_ids_to_remove))
961
        related_translation_messages.remove()
962
        store.find(
963
            POTMsgSet, In(POTMsgSet.id, msgset_ids_to_remove)).remove()
964
        self.offset = self.offset + chunk_size
965
        transaction.commit()
966
967
13646.11.18 by Steve Kowalik
Review fix-ups, collapse to one test.
968
# XXX: StevenK 2011-09-14 bug=849683: This can be removed when done.
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
969
class SourcePackagePublishingHistorySPNPopulator(TunableLoop):
13646.11.18 by Steve Kowalik
Review fix-ups, collapse to one test.
970
    """Populate the new sourcepackagename column of SPPH."""
971
13646.11.19 by Steve Kowalik
Switch to a boolean for done-ness
972
    done = False
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
973
    maximum_chunk_size = 5000
974
13646.11.20 by Steve Kowalik
Remove offset more fully
975
    def findSPPHs(self):
13646.11.18 by Steve Kowalik
Review fix-ups, collapse to one test.
976
        return IMasterStore(SourcePackagePublishingHistory).find(
977
            SourcePackagePublishingHistory,
13646.11.19 by Steve Kowalik
Switch to a boolean for done-ness
978
            SourcePackagePublishingHistory.sourcepackagename == None
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
979
            ).order_by(SourcePackagePublishingHistory.id)
980
981
    def isDone(self):
982
        """See `TunableLoop`."""
13646.11.19 by Steve Kowalik
Switch to a boolean for done-ness
983
        return self.done
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
984
985
    def __call__(self, chunk_size):
986
        """See `TunableLoop`."""
13646.11.20 by Steve Kowalik
Remove offset more fully
987
        spphs = self.findSPPHs()[:chunk_size]
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
988
        for spph in spphs:
989
            spph.sourcepackagename = (
990
                spph.sourcepackagerelease.sourcepackagename)
991
        transaction.commit()
13646.11.20 by Steve Kowalik
Remove offset more fully
992
        self.done = self.findSPPHs().is_empty()
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
993
994
13646.11.18 by Steve Kowalik
Review fix-ups, collapse to one test.
995
# XXX: StevenK 2011-09-14 bug=849683: This can be removed when done.
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
996
class BinaryPackagePublishingHistoryBPNPopulator(TunableLoop):
13646.11.18 by Steve Kowalik
Review fix-ups, collapse to one test.
997
    """Populate the new binarypackagename column of BPPH."""
998
13646.11.19 by Steve Kowalik
Switch to a boolean for done-ness
999
    done = False
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
1000
    maximum_chunk_size = 5000
1001
13646.11.20 by Steve Kowalik
Remove offset more fully
1002
    def findBPPHs(self):
13646.11.18 by Steve Kowalik
Review fix-ups, collapse to one test.
1003
        return IMasterStore(BinaryPackagePublishingHistory).find(
1004
            BinaryPackagePublishingHistory,
13646.11.19 by Steve Kowalik
Switch to a boolean for done-ness
1005
            BinaryPackagePublishingHistory.binarypackagename == None
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
1006
            ).order_by(BinaryPackagePublishingHistory.id)
1007
1008
    def isDone(self):
1009
        """See `TunableLoop`."""
13646.11.19 by Steve Kowalik
Switch to a boolean for done-ness
1010
        return self.done
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
1011
1012
    def __call__(self, chunk_size):
1013
        """See `TunableLoop`."""
13646.11.20 by Steve Kowalik
Remove offset more fully
1014
        bpphs = self.findBPPHs()[:chunk_size]
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
1015
        for bpph in bpphs:
1016
            bpph.binarypackagename = (
1017
                bpph.binarypackagerelease.binarypackagename)
1018
        transaction.commit()
13646.11.20 by Steve Kowalik
Remove offset more fully
1019
        self.done = self.findBPPHs().is_empty()
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
1020
1021
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1022
class BaseDatabaseGarbageCollector(LaunchpadCronScript):
1023
    """Abstract base class to run a collection of TunableLoops."""
13635.1.2 by Ian Booth
Lint
1024
    script_name = None  # Script name for locking and database user. Override.
1025
    tunable_loops = None  # Collection of TunableLoops. Override.
1026
    continue_on_failure = False  # If True, an exception in a tunable loop
1027
                                 # does not cause the script to abort.
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1028
8758.3.4 by Stuart Bishop
Improve default garbo task timeouts
1029
    # Default run time of the script in seconds. Override.
1030
    default_abort_script_time = None
1031
7675.177.7 by Stuart Bishop
Ensure tests run multiple iterations of the LoopTuner and add logging
1032
    # _maximum_chunk_size is used to override the defined
1033
    # maximum_chunk_size to allow our tests to ensure multiple calls to
1034
    # __call__ are required without creating huge amounts of test data.
1035
    _maximum_chunk_size = None
1036
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1037
    def __init__(self, test_args=None):
1038
        super(BaseDatabaseGarbageCollector, self).__init__(
7675.177.6 by Stuart Bishop
Garbage collect OpenIDAssociations, allow scripts to correct to the auth store as the correct database user, ensure changing database connection settings in the test suite resets ZStorm so new credentials are used
1039
            self.script_name,
10234.3.5 by Curtis Hovey
Quiet lint.
1040
            dbuser=self.script_name.replace('-', '_'),
7675.177.6 by Stuart Bishop
Garbage collect OpenIDAssociations, allow scripts to correct to the auth store as the correct database user, ensure changing database connection settings in the test suite resets ZStorm so new credentials are used
1041
            test_args=test_args)
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1042
8697.25.5 by Stuart Bishop
Add --experimental option to garbo-*.py, and move PersonPruner to the experimental list so it is only run on staging
1043
    def add_my_options(self):
8758.3.4 by Stuart Bishop
Improve default garbo task timeouts
1044
8697.25.5 by Stuart Bishop
Add --experimental option to garbo-*.py, and move PersonPruner to the experimental list so it is only run on staging
1045
        self.parser.add_option("-x", "--experimental", dest="experimental",
1046
            default=False, action="store_true",
1047
            help="Run experimental jobs. Normally this is just for staging.")
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
1048
        self.parser.add_option("--abort-script",
8758.3.4 by Stuart Bishop
Improve default garbo task timeouts
1049
            dest="abort_script", default=self.default_abort_script_time,
1050
            action="store", type="float", metavar="SECS",
1051
            help="Abort script after SECS seconds [Default %d]."
1052
            % self.default_abort_script_time)
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
1053
        self.parser.add_option("--abort-task",
8758.2.51 by Stuart Bishop
Multithreaded garbo
1054
            dest="abort_task", default=None, action="store", type="float",
8758.3.4 by Stuart Bishop
Improve default garbo task timeouts
1055
            metavar="SECS", help="Abort a task if it runs over SECS seconds "
1056
                "[Default (threads * abort_script / tasks)].")
8758.2.51 by Stuart Bishop
Multithreaded garbo
1057
        self.parser.add_option("--threads",
1058
            dest="threads", default=multiprocessing.cpu_count(),
1059
            action="store", type="int", metavar='NUM',
1060
            help="Run NUM tasks in parallel [Default %d]."
1061
            % multiprocessing.cpu_count())
8697.25.5 by Stuart Bishop
Add --experimental option to garbo-*.py, and move PersonPruner to the experimental list so it is only run on staging
1062
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1063
    def main(self):
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1064
        self.start_time = time.time()
8758.2.51 by Stuart Bishop
Multithreaded garbo
1065
1066
        # Stores the number of failed tasks.
1067
        self.failure_count = 0
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
1068
8758.6.4 by Stuart Bishop
Readability
1069
        # Copy the list so we can safely consume it.
1070
        tunable_loops = list(self.tunable_loops)
8697.25.5 by Stuart Bishop
Add --experimental option to garbo-*.py, and move PersonPruner to the experimental list so it is only run on staging
1071
        if self.options.experimental:
8758.6.4 by Stuart Bishop
Readability
1072
            tunable_loops.extend(self.experimental_tunable_loops)
8758.2.51 by Stuart Bishop
Multithreaded garbo
1073
1074
        threads = set()
1075
        for count in range(0, self.options.threads):
1076
            thread = threading.Thread(
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1077
                target=self.run_tasks_in_thread,
13635.1.2 by Ian Booth
Lint
1078
                name='Worker-%d' % (count + 1,),
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1079
                args=(tunable_loops,))
8758.2.51 by Stuart Bishop
Multithreaded garbo
1080
            thread.start()
1081
            threads.add(thread)
1082
1083
        # Block until all the worker threads have completed. We block
1084
        # until the script timeout is hit, plus 60 seconds. We wait the
1085
        # extra time because the loops are supposed to shut themselves
1086
        # down when the script timeout is hit, and the extra time is to
1087
        # give them a chance to clean up.
1088
        for thread in threads:
8758.6.3 by Stuart Bishop
Fix broken wait-for-threads-to-terminate code
1089
            time_to_go = self.get_remaining_script_time() + 60
1090
            if time_to_go > 0:
8758.2.51 by Stuart Bishop
Multithreaded garbo
1091
                thread.join(time_to_go)
1092
            else:
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
1093
                break
1094
8758.2.51 by Stuart Bishop
Multithreaded garbo
1095
        # If the script ran out of time, warn.
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1096
        if self.get_remaining_script_time() < 0:
8758.2.51 by Stuart Bishop
Multithreaded garbo
1097
            self.logger.warn(
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1098
                "Script aborted after %d seconds.", self.script_timeout)
1099
1100
        if tunable_loops:
1101
            self.logger.warn("%d tasks did not run.", len(tunable_loops))
8758.2.51 by Stuart Bishop
Multithreaded garbo
1102
1103
        if self.failure_count:
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1104
            self.logger.error("%d tasks failed.", self.failure_count)
8758.2.51 by Stuart Bishop
Multithreaded garbo
1105
            raise SilentLaunchpadScriptFailure(self.failure_count)
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1106
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1107
    def get_remaining_script_time(self):
1108
        return self.start_time + self.script_timeout - time.time()
1109
1110
    @property
1111
    def script_timeout(self):
13635.1.2 by Ian Booth
Lint
1112
        a_very_long_time = 31536000  # 1 year
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1113
        return self.options.abort_script or a_very_long_time
1114
1115
    def get_loop_logger(self, loop_name):
8758.6.5 by Stuart Bishop
Make adding the PrefixFilter more robust
1116
        """Retrieve a logger for use by a particular task.
1117
1118
        The logger will be configured to add the loop_name as a
1119
        prefix to all log messages, making interleaved output from
1120
        multiple threads somewhat readable.
1121
        """
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1122
        loop_logger = logging.getLogger('garbo.' + loop_name)
8758.6.5 by Stuart Bishop
Make adding the PrefixFilter more robust
1123
        for filter in loop_logger.filters:
1124
            if isinstance(filter, PrefixFilter):
13635.1.2 by Ian Booth
Lint
1125
                return loop_logger  # Already have a PrefixFilter attached.
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1126
        loop_logger.addFilter(PrefixFilter(loop_name))
1127
        return loop_logger
1128
1129
    def get_loop_abort_time(self, num_remaining_tasks):
1130
        # How long until the task should abort.
1131
        if self.options.abort_task is not None:
1132
            # Task timeout specified on command line.
1133
            abort_task = self.options.abort_task
1134
1135
        elif num_remaining_tasks <= self.options.threads:
1136
            # We have a thread for every remaining task. Let
1137
            # the task run until the script timeout.
1138
            self.logger.debug2(
1139
                "Task may run until script timeout.")
1140
            abort_task = self.get_remaining_script_time()
1141
1142
        else:
1143
            # Evenly distribute the remaining time to the
1144
            # remaining tasks.
1145
            abort_task = (
1146
                self.options.threads
1147
                * self.get_remaining_script_time() / num_remaining_tasks)
1148
1149
        return min(abort_task, self.get_remaining_script_time())
1150
1151
    def run_tasks_in_thread(self, tunable_loops):
1152
        """Worker thread target to run tasks.
1153
1154
        Tasks are removed from tunable_loops and run one at a time,
1155
        until all tasks that can be run have been run or the script
1156
        has timed out.
1157
        """
1158
        self.logger.debug(
1159
            "Worker thread %s running.", threading.currentThread().name)
1160
        self.login()
1161
1162
        while True:
1163
            # How long until the script should abort.
1164
            if self.get_remaining_script_time() <= 0:
1165
                # Exit silently. We warn later.
1166
                self.logger.debug(
1167
                    "Worker thread %s detected script timeout.",
1168
                    threading.currentThread().name)
1169
                break
1170
1171
            num_remaining_tasks = len(tunable_loops)
1172
            if not num_remaining_tasks:
1173
                break
1174
            tunable_loop_class = tunable_loops.pop(0)
1175
1176
            loop_name = tunable_loop_class.__name__
1177
8758.6.6 by Stuart Bishop
Better lock logging
1178
            loop_logger = self.get_loop_logger(loop_name)
1179
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1180
            # Aquire a lock for the task. Multiple garbo processes
1181
            # might be running simultaneously.
1182
            loop_lock_path = os.path.join(
1183
                LOCK_PATH, 'launchpad-garbo-%s.lock' % loop_name)
1184
            # No logger - too noisy, so report issues ourself.
1185
            loop_lock = GlobalLock(loop_lock_path, logger=None)
1186
            try:
1187
                loop_lock.acquire()
8758.6.6 by Stuart Bishop
Better lock logging
1188
                loop_logger.debug("Aquired lock %s.", loop_lock_path)
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1189
            except LockAlreadyAcquired:
1190
                # If the lock cannot be acquired, but we have plenty
1191
                # of time remaining, just put the task back to the
1192
                # end of the queue.
1193
                if self.get_remaining_script_time() > 60:
8758.6.6 by Stuart Bishop
Better lock logging
1194
                    loop_logger.debug3(
1195
                        "Unable to acquire lock %s. Running elsewhere?",
1196
                        loop_lock_path)
13635.1.2 by Ian Booth
Lint
1197
                    time.sleep(0.3)  # Avoid spinning.
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1198
                    tunable_loops.append(tunable_loop_class)
1199
                # Otherwise, emit a warning and skip the task.
1200
                else:
8758.6.6 by Stuart Bishop
Better lock logging
1201
                    loop_logger.warn(
1202
                        "Unable to acquire lock %s. Running elsewhere?",
1203
                        loop_lock_path)
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1204
                continue
1205
1206
            try:
1207
                loop_logger.info("Running %s", loop_name)
1208
1209
                abort_time = self.get_loop_abort_time(num_remaining_tasks)
1210
                loop_logger.debug2(
1211
                    "Task will be terminated in %0.3f seconds",
1212
                    abort_time)
1213
1214
                tunable_loop = tunable_loop_class(
1215
                    abort_time=abort_time, log=loop_logger)
1216
1217
                # Allow the test suite to override the chunk size.
1218
                if self._maximum_chunk_size is not None:
1219
                    tunable_loop.maximum_chunk_size = (
1220
                        self._maximum_chunk_size)
1221
1222
                try:
1223
                    tunable_loop.run()
1224
                    loop_logger.debug(
1225
                        "%s completed sucessfully.", loop_name)
1226
                except Exception:
1227
                    loop_logger.exception("Unhandled exception")
1228
                    self.failure_count += 1
1229
1230
            finally:
1231
                loop_lock.release()
8758.6.6 by Stuart Bishop
Better lock logging
1232
                loop_logger.debug("Released lock %s.", loop_lock_path)
8758.6.1 by Stuart Bishop
Lock per garbo task allowing multiple garbo scripts to run simultaneously.
1233
                transaction.abort()
1234
8697.25.1 by Guilherme Salgado
New task ran as part of garbo-daily to delete unlinked person entries.
1235
8758.3.13 by Stuart Bishop
Add garbo-frequently
1236
class FrequentDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
1237
    """Run every 5 minutes.
1238
1239
    This may become even more frequent in the future.
1240
1241
    Jobs with low overhead can go here to distribute work more evenly.
1242
    """
1243
    script_name = 'garbo-frequently'
1244
    tunable_loops = [
8758.3.20 by Stuart Bishop
Add garbo job to rollup BugSummaryJournal
1245
        BugSummaryJournalRollup,
8758.3.13 by Stuart Bishop
Add garbo-frequently
1246
        OAuthNoncePruner,
1247
        OpenIDConsumerNoncePruner,
1248
        OpenIDConsumerAssociationPruner,
8758.3.17 by Stuart Bishop
Move AntiqueSessionPruner to frequent list due to low overhead
1249
        AntiqueSessionPruner,
8758.3.13 by Stuart Bishop
Add garbo-frequently
1250
        ]
1251
    experimental_tunable_loops = []
1252
1253
    # 5 minmutes minus 20 seconds for cleanup. This helps ensure the
1254
    # script is fully terminated before the next scheduled hourly run
1255
    # kicks in.
1256
    default_abort_script_time = 60 * 5 - 20
1257
1258
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1259
class HourlyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
8758.3.13 by Stuart Bishop
Add garbo-frequently
1260
    """Run every hour.
1261
1262
    Jobs we want to run fairly often but have noticable overhead go here.
1263
    """
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1264
    script_name = 'garbo-hourly'
1265
    tunable_loops = [
7675.169.3 by Tim Penhey
Add methods to populate and prune the RevisionCache.
1266
        RevisionCachePruner,
7675.599.7 by Graham Binns
Assorted gubbins for making garbo run properly. Wewt.
1267
        BugWatchScheduler,
4953.7.6 by Stuart Bishop
Move session garbage collection into garbo
1268
        UnusedSessionPruner,
4953.7.17 by Stuart Bishop
Keep only last 6 authenticated sessions for a user
1269
        DuplicateSessionPruner,
8758.3.7 by Stuart Bishop
Per Bug #507474, move BugHeatUpdater to the end of the tunable_loops so it benefits from increased task timeout when earlier tasks complete early.
1270
        BugHeatUpdater,
13646.11.1 by Steve Kowalik
First shot changes at garbo script.
1271
        SourcePackagePublishingHistorySPNPopulator,
1272
        BinaryPackagePublishingHistoryBPNPopulator,
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1273
        ]
8697.25.5 by Stuart Bishop
Add --experimental option to garbo-*.py, and move PersonPruner to the experimental list so it is only run on staging
1274
    experimental_tunable_loops = []
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1275
8758.3.11 by Stuart Bishop
Hourly and daily runs should leave time for cleanup by default, to ensure they are fully terminated before the next scheduled run kicks in.
1276
    # 1 hour, minus 5 minutes for cleanup. This ensures the script is
1277
    # fully terminated before the next scheduled hourly run kicks in.
1278
    default_abort_script_time = 60 * 55
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
1279
7675.177.1 by Stuart Bishop
Use DBLoopTuner by default
1280
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1281
class DailyDatabaseGarbageCollector(BaseDatabaseGarbageCollector):
8758.3.13 by Stuart Bishop
Add garbo-frequently
1282
    """Run every day.
1283
1284
    Jobs that don't need to be run frequently.
1285
1286
    If there is low overhead, consider putting these tasks in more
1287
    frequently invoked lists to distribute the work more evenly.
1288
    """
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1289
    script_name = 'garbo-daily'
1290
    tunable_loops = [
13635.1.1 by Ian Booth
Add garbo job to remove old answer contacts
1291
        AnswerContactPruner,
7675.809.14 by Robert Collins
Merge trunk resolving conflicts, fingers crossed.
1292
        BranchJobPruner,
1293
        BugNotificationPruner,
1294
        BugWatchActivityPruner,
11703.1.4 by Tim Penhey
Prune the CodeImportEvents.
1295
        CodeImportEventPruner,
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1296
        CodeImportResultPruner,
8303.10.5 by James Henstridge
Update tests that expect RevisionAuthors or HWSubmissions to be linked
1297
        HWSubmissionEmailLinker,
8758.4.18 by Stuart Bishop
Remove LoginToken rows older than 1 year
1298
        LoginTokenPruner,
8758.2.47 by Stuart Bishop
Switch ObsoleteBugAttachmentPruner to use BulkPruner
1299
        ObsoleteBugAttachmentPruner,
7675.809.6 by Robert Collins
Create a garbo daily task to clean up file access tokens.
1300
        OldTimeLimitedTokenDeleter,
7675.809.14 by Robert Collins
Merge trunk resolving conflicts, fingers crossed.
1301
        RevisionAuthorEmailLinker,
7675.758.8 by Jeroen Vermeulen
Okay, okay, running it through garbo instead of cron.
1302
        SuggestiveTemplatesCacheUpdater,
8758.2.27 by Stuart Bishop
POTranslationPruner
1303
        POTranslationPruner,
13581.1.1 by Danilo Segan
Merge gmb's fix for 814576.
1304
        UnusedPOTMsgSetPruner,
8697.25.2 by Guilherme Salgado
Skip Person entries created after 2009-04-01, as that's when our OpenID provider started creating personless accounts
1305
        ]
8697.25.5 by Stuart Bishop
Add --experimental option to garbo-*.py, and move PersonPruner to the experimental list so it is only run on staging
1306
    experimental_tunable_loops = [
8697.25.4 by Stuart Bishop
Optimize PersonPruner
1307
        PersonPruner,
7675.85.2 by Jonathan Lange
Undo revision generated by step 2 of process.
1308
        ]
8870.1.2 by Stuart Bishop
Add abort times to loop tuner and garbo
1309
8758.3.11 by Stuart Bishop
Hourly and daily runs should leave time for cleanup by default, to ensure they are fully terminated before the next scheduled run kicks in.
1310
    # 1 day, minus 30 minutes for cleanup. This ensures the script is
1311
    # fully terminated before the next scheduled daily run kicks in.
1312
    default_abort_script_time = 60 * 60 * 23.5