~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python
8687.15.22 by Karl Fogel
Add the copyright header block to the remaining .py files.
2
#
3
# Copyright 2009 Canonical Ltd.  This software is licensed under the
4
# GNU Affero General Public License version 3 (see the file LICENSE).
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
5
6
# pylint: disable-msg=C0103,W0403
7
8
from zope.component import getUtility
9
7548.6.10 by Julian Edwards
Add tests and fix bustage found
10
from canonical.database.sqlbase import sqlvalues
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
11
from canonical.launchpad.webapp.interfaces import (
12
    DEFAULT_FLAVOR,
13
    IStoreSelector,
14
    MAIN_STORE,
15
    )
8356.1.1 by Leonard Richardson
Partial move.
16
from lp.services.scripts.base import LaunchpadCronScript
11411.6.2 by Julian Edwards
Change code imports for ArchivePurpose and ArchiveStatus
17
from lp.soyuz.enums import ArchivePurpose
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
18
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
19
# PPAs that we never want to expire.
20
BLACKLISTED_PPAS = """
21
adobe-isv
22
chelsea-team
23
dennis-team
24
elvis-team
25
fluendo-isv
26
natick-team
27
netbook-remix-team
28
netbook-team
29
oem-solutions-group
30
payson
31
transyl
32
ubuntu-mobile
33
wheelbarrow
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
34
bzr
35
bzr-beta-ppa
36
bzr-nightly-ppa
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
37
""".split()
38
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
39
10466.3.1 by Muharem Hrnjadovic
renamed ppa-binary-expiry to archive-file-expiry since the script is not to expire sources/binaries in partner archives as well.
40
class ArchiveExpirer(LaunchpadCronScript):
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
41
    """Helper class for expiring old PPA binaries.
8486.12.1 by Celso Providelo
Fixing the 'fixable' scripts and adding a checker with the really-broken ones blacklisted.
42
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
43
    Any PPA binary older than 30 days that is superseded or deleted
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
44
    will be marked for immediate expiry.
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
45
    """
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
46
    blacklist = BLACKLISTED_PPAS
47
48
    def add_my_options(self):
49
        """Add script command line options."""
50
        self.parser.add_option(
51
            "-n", "--dry-run", action="store_true",
52
            dest="dryrun", metavar="DRY_RUN", default=False,
53
            help="If set, no transactions are committed")
9861.6.4 by Julian Edwards
allenap's review suggestion
54
        self.parser.add_option(
55
            "-e", "--expire-after", action="store", type="int",
56
            dest="num_days", metavar="DAYS", default=15,
57
            help=("The number of days after which to expire binaries. "
58
                  "Must be specified."))
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
59
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
60
    def determineSourceExpirables(self, num_days):
61
        """Return expirable libraryfilealias IDs."""
62
        stay_of_execution = '%d days' % num_days
10466.3.4 by Muharem Hrnjadovic
The script now expires sources/binaries in partner archives as well.
63
        archive_types = (ArchivePurpose.PPA, ArchivePurpose.PARTNER)
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
64
65
        # The subquery here has to repeat the checks for privacy and
66
        # blacklisting on *other* publications that are also done in
67
        # the main loop for the archive being considered.
68
        results = self.store.execute("""
69
            SELECT lfa.id
70
            FROM
71
                LibraryFileAlias AS lfa,
72
                Archive,
73
                SourcePackageReleaseFile AS sprf,
74
                SourcePackageRelease AS spr,
75
                SourcePackagePublishingHistory AS spph
76
            WHERE
77
                lfa.id = sprf.libraryfile
78
                AND spr.id = sprf.sourcepackagerelease
79
                AND spph.sourcepackagerelease = spr.id
80
                AND spph.dateremoved < (
81
                    CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval %s)
82
                AND spph.archive = archive.id
10466.3.4 by Muharem Hrnjadovic
The script now expires sources/binaries in partner archives as well.
83
                AND archive.purpose IN %s
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
84
                AND lfa.expires IS NULL
85
            EXCEPT
86
            SELECT sprf.libraryfile
87
            FROM
88
                SourcePackageRelease AS spr,
89
                SourcePackageReleaseFile AS sprf,
90
                SourcePackagePublishingHistory AS spph,
91
                Archive AS a,
92
                Person AS p
93
            WHERE
94
                spr.id = sprf.sourcepackagerelease
95
                AND spph.sourcepackagerelease = spr.id
96
                AND spph.archive = a.id
97
                AND p.id = a.owner
98
                AND (
10466.3.5 by Muharem Hrnjadovic
Limit expiry black-listing (based on archive owner name) to PPAs only.
99
                    (p.name IN %s AND a.purpose = %s)
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
100
                    OR a.private IS TRUE
10466.3.4 by Muharem Hrnjadovic
The script now expires sources/binaries in partner archives as well.
101
                    OR a.purpose NOT IN %s
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
102
                    OR dateremoved >
103
                        CURRENT_TIMESTAMP AT TIME ZONE 'UTC' - interval %s
104
                    OR dateremoved IS NULL);
105
            """ % sqlvalues(
10466.3.4 by Muharem Hrnjadovic
The script now expires sources/binaries in partner archives as well.
106
                stay_of_execution, archive_types, self.blacklist,
10466.3.5 by Muharem Hrnjadovic
Limit expiry black-listing (based on archive owner name) to PPAs only.
107
                ArchivePurpose.PPA, archive_types, stay_of_execution))
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
108
109
        lfa_ids = results.get_all()
110
        return lfa_ids
111
112
    def determineBinaryExpirables(self, num_days):
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
113
        """Return expirable libraryfilealias IDs."""
9861.6.1 by Julian Edwards
Make the stay of execution period for ppa binary expiring configurable on the script's command line.
114
        stay_of_execution = '%d days' % num_days
10466.3.4 by Muharem Hrnjadovic
The script now expires sources/binaries in partner archives as well.
115
        archive_types = (ArchivePurpose.PPA, ArchivePurpose.PARTNER)
7548.6.11 by Julian Edwards
Add an explanatory comment
116
117
        # The subquery here has to repeat the checks for privacy and
7548.6.12 by Julian Edwards
Fix a typo
118
        # blacklisting on *other* publications that are also done in
7548.6.11 by Julian Edwards
Add an explanatory comment
119
        # the main loop for the archive being considered.
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
120
        results = self.store.execute("""
121
            SELECT lfa.id
122
            FROM
123
                LibraryFileAlias AS lfa,
7548.6.18 by Julian Edwards
Ensure only PPAs are expired
124
                Archive,
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
125
                BinaryPackageFile AS bpf,
126
                BinaryPackageRelease AS bpr,
7659.7.4 by Julian Edwards
Remove some references to secure* introduced when merging trunk.
127
                BinaryPackagePublishingHistory AS bpph
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
128
            WHERE
129
                lfa.id = bpf.libraryfile
130
                AND bpr.id = bpf.binarypackagerelease
131
                AND bpph.binarypackagerelease = bpr.id
132
                AND bpph.dateremoved < (
10466.3.7 by Muharem Hrnjadovic
enhanced query by naming format string pieces
133
                    CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
134
                    interval %(stay_of_execution)s)
7548.6.18 by Julian Edwards
Ensure only PPAs are expired
135
                AND bpph.archive = archive.id
10466.3.7 by Muharem Hrnjadovic
enhanced query by naming format string pieces
136
                AND archive.purpose IN %(archive_types)s
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
137
                AND lfa.expires IS NULL
138
            EXCEPT
139
            SELECT bpf.libraryfile
140
            FROM
141
                BinaryPackageRelease AS bpr,
142
                BinaryPackageFile AS bpf,
7659.7.4 by Julian Edwards
Remove some references to secure* introduced when merging trunk.
143
                BinaryPackagePublishingHistory AS bpph,
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
144
                Archive AS a,
145
                Person AS p
146
            WHERE
147
                bpr.id = bpf.binarypackagerelease
148
                AND bpph.binarypackagerelease = bpr.id
149
                AND bpph.archive = a.id
150
                AND p.id = a.owner
151
                AND (
10466.3.7 by Muharem Hrnjadovic
enhanced query by naming format string pieces
152
                    (p.name IN %(blacklist)s AND a.purpose = %(ppa)s)
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
153
                    OR a.private IS TRUE
10466.3.7 by Muharem Hrnjadovic
enhanced query by naming format string pieces
154
                    OR a.purpose NOT IN %(archive_types)s
155
                    OR dateremoved > (
156
                        CURRENT_TIMESTAMP AT TIME ZONE 'UTC' -
157
                        interval %(stay_of_execution)s)
158
                    OR dateremoved IS NULL)
10466.3.10 by Muharem Hrnjadovic
simplified code to make use of sqlvalues() names params
159
            """ % sqlvalues(
160
                stay_of_execution=stay_of_execution,
161
                archive_types=archive_types,
162
                blacklist=self.blacklist,
163
                ppa=ArchivePurpose.PPA))
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
164
165
        lfa_ids = results.get_all()
166
        return lfa_ids
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
167
168
    def main(self):
169
        self.logger.info('Starting the PPA binary expiration')
9861.6.4 by Julian Edwards
allenap's review suggestion
170
        num_days = self.options.num_days
9861.6.1 by Julian Edwards
Make the stay of execution period for ppa binary expiring configurable on the script's command line.
171
        self.logger.info("Expiring files up to %d days ago" % num_days)
172
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
173
        self.store = getUtility(IStoreSelector).get(
174
            MAIN_STORE, DEFAULT_FLAVOR)
175
10169.1.1 by Julian Edwards
Expire sources as well as binaries when expiring PPA files.
176
        lfa_ids = self.determineSourceExpirables(num_days)
177
        lfa_ids.extend(self.determineBinaryExpirables(num_days))
7548.6.16 by Julian Edwards
Amend the query to make it a lot faster, and iterate over LFA ids instead of PPAs
178
        batch_count = 0
179
        batch_limit = 500
180
        for id in lfa_ids:
181
            self.logger.info("Expiring libraryfilealias %s" % id)
182
            self.store.execute("""
183
                UPDATE libraryfilealias
184
                SET expires = CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
185
                WHERE id = %s
186
                """ % id)
187
            batch_count += 1
188
            if batch_count % batch_limit == 0:
189
                if self.options.dryrun:
190
                    self.logger.info(
191
                        "%s done, not committing (dryrun mode)" % batch_count)
192
                    self.txn.abort()
193
                else:
194
                    self.logger.info(
195
                        "%s done, committing transaction" % batch_count)
196
                    self.txn.commit()
197
198
        if self.options.dryrun:
199
            self.txn.abort()
200
        else:
201
            self.txn.commit()
7548.6.9 by Julian Edwards
Re-write script to commit after every PPA is processed and optimise the SQL
202
7548.6.3 by Julian Edwards
Split cronscript out to a separate script file so it can be tested more easily.
203
        self.logger.info('Finished PPA binary expiration')
204