~launchpad-pqm/launchpad/devel

8687.15.18 by Karl Fogel
Add the copyright header block to files under lib/canonical/.
1
# Copyright 2009 Canonical Ltd.  This software is licensed under the
2
# GNU Affero General Public License version 3 (see the file LICENSE).
4271.2.6 by Jeroen Vermeulen
Added header similar to that of other modules
3
4
__metaclass__ = type
5
13011.4.2 by Stuart Bishop
Delint
6
__all__ = ['MultiTableCopy']
4271.2.6 by Jeroen Vermeulen
Added header similar to that of other modules
7
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
8
import logging
13011.4.1 by Stuart Bishop
Refused to allow multitablecopy to create confusing tablenames
9
import re
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
10
import time
11
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
12
from zope.interface import implements
13
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
14
from canonical.database import postgresql
4726.1.2 by Jeroen Vermeulen
Changes based on review.
15
from canonical.database.sqlbase import (cursor, quote, quoteIdentifier)
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
16
from canonical.launchpad.interfaces.looptuner import ITunableLoop
7675.193.2 by Henning Eggers
Use DBLoopTuner for pouring in MultiTableCopy.
17
from canonical.launchpad.utilities.looptuner import DBLoopTuner
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
18
19
20
class PouringLoop:
4271.3.2 by Jeroen Vermeulen
Renamed TunedLoop to TunableLoop based on pre-impl chat with BjornT.
21
    """Loop body to pour data from holding tables back into source tables.
22
7675.193.2 by Henning Eggers
Use DBLoopTuner for pouring in MultiTableCopy.
23
    Used by MultiTableCopy internally to tell DBLoopTuner what to do.
4271.3.2 by Jeroen Vermeulen
Renamed TunedLoop to TunableLoop based on pre-impl chat with BjornT.
24
    """
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
25
    implements(ITunableLoop)
26
4726.1.2 by Jeroen Vermeulen
Changes based on review.
27
    def __init__(self, from_table, to_table, transaction_manager, logger,
28
        batch_pouring_callback=None):
29
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
30
        self.from_table = str(from_table)
31
        self.to_table = str(to_table)
32
        self.transaction_manager = transaction_manager
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
33
        self.logger = logger
4726.1.2 by Jeroen Vermeulen
Changes based on review.
34
        self.batch_pouring_callback = batch_pouring_callback
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
35
        self.cur = cursor()
36
37
        self.cur.execute("SELECT min(id), max(id) FROM %s" % from_table)
4574.2.1 by Jeroen Vermeulen
Small change from review that was missed earlier.
38
        self.lowest_id, self.highest_id = self.cur.fetchone()
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
39
40
        if self.lowest_id is None:
41
            # Table is empty.
42
            self.lowest_id = 1
43
            self.highest_id = 0
44
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
45
        self.logger.debug("Up to %d rows in holding table"
46
                         % (self.highest_id + 1 - self.lowest_id))
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
47
4271.3.3 by Jeroen Vermeulen
Removed unused post(), renamed done() to meet naming requirements & pre-impl suggestions
48
    def isDone(self):
4726.1.2 by Jeroen Vermeulen
Changes based on review.
49
        """See `ITunableLoop`."""
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
50
        return self.lowest_id is None or self.lowest_id > self.highest_id
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
51
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
52
    def __call__(self, batch_size):
4726.1.2 by Jeroen Vermeulen
Changes based on review.
53
        """See `ITunableLoop`.
54
55
        Loop body: pour rows with ids up to "next" over to to_table."""
4271.3.8 by Jeroen Vermeulen
Make chunk_size a float (but round it in MultiTableCopy!), lots more tests
56
        batch_size = int(batch_size)
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
57
58
        # Figure out what id lies exactly batch_size rows ahead.
59
        self.cur.execute("""
60
            SELECT id
61
            FROM %s
4568.1.8 by Jeroen Vermeulen
Compile fixes.
62
            WHERE id >= %s
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
63
            ORDER BY id
4568.1.8 by Jeroen Vermeulen
Compile fixes.
64
            OFFSET %s
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
65
            LIMIT 1
4568.1.8 by Jeroen Vermeulen
Compile fixes.
66
            """ % (self.from_table, quote(self.lowest_id), quote(batch_size)))
67
        end_id = self.cur.fetchone()
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
68
69
        if end_id is not None:
70
            next = end_id[0]
71
        else:
4568.1.8 by Jeroen Vermeulen
Compile fixes.
72
            next = self.highest_id
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
73
74
        next += 1
75
76
        self.prepareBatch(
77
            self.from_table, self.to_table, batch_size, self.lowest_id, next)
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
78
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
79
        self.logger.debug("pouring %s: %d rows (%d-%d)" % (
80
            self.from_table, batch_size, self.lowest_id, next))
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
81
4271.3.21 by Jeroen Vermeulen
Few more improvements suggested by reviewer.
82
        self.cur.execute("INSERT INTO %s (SELECT * FROM %s WHERE id < %d)"
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
83
                         % (self.to_table, self.from_table, next))
84
4271.3.21 by Jeroen Vermeulen
Few more improvements suggested by reviewer.
85
        self.cur.execute("DELETE FROM %s WHERE id < %d"
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
86
                         % (self.from_table, next))
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
87
88
        self.lowest_id = next
89
        self._commit()
90
91
    def _commit(self):
4726.1.2 by Jeroen Vermeulen
Changes based on review.
92
        """Commit ongoing transaction, start a new one."""
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
93
        self.transaction_manager.commit()
94
        self.transaction_manager.begin()
95
        self.cur = cursor()
4918.4.22 by Jeroen Vermeulen
Changes based on reviews by bac and jml.
96
        # Disable slow sequential scans.  The database server is reluctant to
97
        # use indexes on tables that undergo large changes, such as the
98
        # deletion of large numbers of rows in this case.  Usually it's
99
        # right but in this case it seems to slow things down dramatically and
100
        # unnecessarily.  We disable sequential scans for every commit since
101
        # initZopeless by default resets our database connection with every
102
        # new transaction.
103
        # MultiTableCopy disables sequential scans for the first batch; this
104
        # just renews our setting after the connection is reset.
4918.4.8 by Jeroen Vermeulen
Re-disable sequential scans after every commit, because the database connection will be reset at that point.
105
        postgresql.allow_sequential_scans(self.cur, False)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
106
5396.2.5 by Jeroen Vermeulen
Cosmetic changes based on review.
107
    def prepareBatch(
108
        self, from_table, to_table, batch_size, begin_id, end_id):
4726.1.2 by Jeroen Vermeulen
Changes based on review.
109
        """If batch_pouring_callback is defined, call it."""
110
        if self.batch_pouring_callback is not None:
111
            self.batch_pouring_callback(
112
                from_table, to_table, batch_size, begin_id, end_id)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
113
13011.4.2 by Stuart Bishop
Delint
114
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
115
class MultiTableCopy:
116
    """Copy interlinked data spanning multiple tables in a coherent fashion.
117
4271.2.4 by Jeroen Vermeulen
Updated documentation; removed last references to translation tables
118
    This allows data from a combination of tables, possibly with foreign-key
119
    references between them, to be copied to a set of corresponding "holding
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
120
    tables"; processed and modified there; and then be inserted back to the
4271.2.4 by Jeroen Vermeulen
Updated documentation; removed last references to translation tables
121
    original tables.  The holding tables are created on demand and dropped
122
    upon completion.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
123
4271.2.10 by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing
124
    You can tell the algorithm to redirect foreign keys.  Say you're copying a
125
    row x1 in a table X, and x1 has a foreign key referring to a row y1 in a
126
    table Y that you're also copying.  You will get copied rows x2 and y2
127
    respectively.  But if you declare the foreign-key relationship between X
128
    and Y to the algorithm, then x2's instance of that foreign key will refer
129
    not to y1 but to the new y2.  Any rows in X whose associated rows of Y are
130
    not copied, are also not copied.  This can be useful when copying data in
5121.2.7 by Stuart Bishop
More required code changes
131
    entire sub-trees of the schema graph, e.g. "one distroseries and all the
4271.2.10 by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing
132
    translations associated with it."
133
134
    All this happens in a two-stage process:
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
135
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
136
    1. Extraction stage.  Use the extract() method to copy selected data to a
137
    holding table, one table at a time.  Ordering matters: always do this in
138
    such an order that the table you are extracting has no foreign-key
139
    references to another table that you are yet to extract.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
140
141
    This stage is relatively fast and holds no locks on the database.  Do any
142
    additional processing on the copied rows in the holding tables, during or
143
    after the extraction stage, so you do not hold any locks on the source
144
    tables yourself.  It's up to you to make sure that all of the rows in the
145
    holding tables can be inserted into their source tables: if you leave
146
    primary keys and such unchanged, unique constraints will be violated in
147
    the next stage.
148
149
    2. Pouring stage.  All data from the holding tables is inserted back into
150
    the source tables.  This entire stage, which normally takes the bulk of
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
151
    the copying time, is performed by calling the pour method.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
152
153
    This stage will lock the rows that are being inserted in the source
154
    tables, if the database is so inclined (e.g. when using postgres with
155
    SERIALIZABLE isolation level).  For that reason, the pouring is done in
156
    smaller, controlled batches.  If you give the object a database
157
    transaction to work with, that transaction will be committed and restarted
158
    between batches.
159
160
    A MultiTableCopy is restartable.  If the process should fail for any
161
    reason, the holding tables will be left in one of two states: if stage 1
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
162
    has not completed, needsRecovery will return False.  In that case, drop
163
    the holding tables using dropHoldingTables and either start again (or give
164
    up).  But if a previous run did complete the extraction stage, the holding
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
165
    tables will remain and contain valid data.  In that case, just pour again
166
    to continue the work (and hopefully complete it this time).
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
167
168
    Holding tables will have names like "temp_POMsgSet_holding_ubuntu_feisty",
169
    in this case for one holding data extracted from source table POMsgSet by
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
170
    a MultiTableCopy called "ubuntu_feisty".  The holding tables are dropped
171
    when we're done, but they are not temp tables.  They have to be persistent
172
    so we get a chance to resume interrupted copies and analyse failures.
173
174
        # We use a regular, persistent table rather than a temp table for
175
        # this so that we get a chance to resume interrupted copies, and
176
        # analyse failures.  If we used temp tables, they'd be gone by the
177
        # time we knew something went wrong.
4271.2.9 by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs
178
179
    The tables to be copied must meet a number of conventions:
180
181
     * First column must be an integer primary key called "id."
182
183
     * id values must be assigned by a sequence, with a name that can be used
184
       in SQL without quoting.
185
186
     * Every foreign-key column that refers to a table that is also being
4271.2.14 by Jeroen Vermeulen
Final documentation touches before requesting review
187
       copied, has the same name as the table it refers to.  This can be
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
188
       changed by subclassing and overriding the _pointsToTable method.
4271.2.9 by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs
189
190
     * Foreign-key column names and the tables they refer to can be used in
191
       SQL without quoting.
192
193
     * For any foreign key column "x" referring to another table that is also
194
       being copied, there must not be a column called "new_x"
4271.2.14 by Jeroen Vermeulen
Final documentation touches before requesting review
195
196
    If you use this class, it is up to you to ensure that the data that you
197
    copy does not break compliance between the time it is extracted from its
198
    source tables and the moment it has been poured back there.  This means
199
    that rows that the data refers to by foreign keys must not be deleted
200
    while the multi-table copy is running, for instance.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
201
    """
202
4486.1.1 by Jeroen Vermeulen
Make MultiTableCopy a bit more conservative with batch sizes
203
    def __init__(self, name, tables, seconds_per_batch=2.0,
4568.1.14 by Jeroen Vermeulen
Merge from RF. Resolved conflicts on logger passing in multitablecopy & distroseries.
204
            minimum_batch_size=500, restartable=True, logger=None):
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
205
        """Define a MultiTableCopy, including an in-order list of tables.
206
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
207
        :param name: a unique identifier for this MultiTableCopy operation,
208
            e.g. "ubuntu_feisty".  The name will be included in the names of
209
            holding tables.
210
        :param tables: a list of tables that will be extracted and poured,
211
            in the order in which they will be extracted (and later, poured).
212
            This is essential when analyzing recoverable state.  You may
213
            attempt multiple extractions from the same table, but all tables
214
            listed must be extracted.  If you do not wish to copy any rows
215
            from a source table, extract with "false" as its where clause.
216
        :param seconds_per_batch: a time goal (in seconds) to define how long,
217
            ideally, the algorithm should be allowed to hold locks on the
218
            source tables.  It will try to do the work in batches that take
219
            about this long.
220
        :param minimum_batch_size: minimum number of rows to pour in one
221
            batch.  You may want to set this to stop the algorithm from
222
            resorting to endless single-row batches in situations where
223
            response times are too slow but batch size turns out not to matter
224
            much.
225
        :param restartable: whether you want the remaining data to be
226
            available for recovery if the connection (or the process) fails
227
            while in the pouring stage.  If False, will extract to temp
4918.4.7 by Jeroen Vermeulen
Added testing for pre-pouring callbacks.
228
            tables.  CAUTION: our connections currently get reset every time
229
            we commit a transaction, obliterating any temp tables possibly in
230
            the middle of the pouring process!
4568.1.14 by Jeroen Vermeulen
Merge from RF. Resolved conflicts on logger passing in multitablecopy & distroseries.
231
        :param logger: a logger to write informational output to.  If none is
232
            given, the default is used.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
233
        """
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
234
        self.name = str(name)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
235
        self.tables = tables
4271.2.17 by Jeroen Vermeulen
Changes based on review by Tim Penhey
236
        self.lower_tables = [table.lower() for table in self.tables]
4271.3.15 by Jeroen Vermeulen
Changes based on review by bac.
237
        self.seconds_per_batch = seconds_per_batch
4271.3.12 by Jeroen Vermeulen
Configurable batching on MultiTableCopy; test with tiny batches
238
        self.minimum_batch_size = minimum_batch_size
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
239
        self.last_extracted_table = None
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
240
        self.restartable = restartable
4726.1.2 by Jeroen Vermeulen
Changes based on review.
241
        self.batch_pouring_callbacks = {}
4918.4.5 by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations.
242
        self.pre_pouring_callbacks = {}
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
243
        if logger is not None:
244
            self.logger = logger
245
        else:
246
            self.logger = logging
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
247
248
    def dropHoldingTables(self):
4271.2.4 by Jeroen Vermeulen
Updated documentation; removed last references to translation tables
249
        """Drop any holding tables that may exist for this MultiTableCopy."""
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
250
        holding_tables = [self.getHoldingTableName(table)
251
                          for table in self.tables]
252
        postgresql.drop_tables(cursor(), holding_tables)
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
253
4271.2.10 by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing
254
    def getRawHoldingTableName(self, tablename, suffix=''):
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
255
        """Name for a holding table, but without quotes.  Use with care."""
256
        if suffix:
257
            suffix = '_%s' % suffix
13011.4.1 by Stuart Bishop
Refused to allow multitablecopy to create confusing tablenames
258
259
        assert re.search(r'[^a-z_]', tablename + suffix) is None, (
260
            'Unsupported characters in table name per Bug #179821')
261
262
        raw_name = "temp_%s_holding_%s%s" % (
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
263
            str(tablename), self.name, str(suffix))
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
264
13011.4.1 by Stuart Bishop
Refused to allow multitablecopy to create confusing tablenames
265
        return raw_name
266
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
267
    def getHoldingTableName(self, tablename, suffix=''):
268
        """Name for a holding table to hold data being copied in tablename.
269
270
        Return value is properly quoted for use as an SQL identifier.
271
        """
4271.2.18 by Jeroen Vermeulen
More feedback from various reviewers
272
        raw_name = self.getRawHoldingTableName(tablename, suffix)
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
273
        return quoteIdentifier(raw_name)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
274
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
275
    def _pointsToTable(self, source_table, foreign_key):
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
276
        """Name of table that source_table.foreign_key refers to.
277
278
        By default, all foreign keys that play a role in the MultiTableCopy
279
        are expected to have the same names as the tables they refer to.  If
280
        that is not the case for your copy, subclass this class and override
281
        this method with a version that has specific knowledge of what points
282
        where.
283
        """
284
        return foreign_key
285
4568.1.9 by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table.
286
    def extract(self, source_table, joins=None, where_clause=None,
4918.4.5 by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations.
287
        id_sequence=None, inert_where=None, pre_pouring_callback=None,
288
        batch_pouring_callback=None, external_joins=None):
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
289
        """Extract (selected) rows from source_table into a holding table.
290
4514.2.1 by Jeroen Vermeulen
Improved documentation.
291
        The holding table gets an additional new_id column with identifiers
292
        generated by `id_sequence`.  Apart from this extra column, (and
293
        indexes and constraints), the holding table is schematically identical
294
        to `source_table`.  A unique index is created for the original id
295
        column.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
296
297
        There is a special facility for redirecting foreign keys to other
4514.2.1 by Jeroen Vermeulen
Improved documentation.
298
        tables in the same copy operation.  The `joins` argument can pass a
299
        list of foreign keys.  The foreign keys given in joins must be columns
300
        of `source_table`, and refer to tables that are also being copied.
301
        The selection used in populating the holding table for `source_table`
302
        will be joined on each of the foreign keys listed in `joins`.  The
303
        foreign keys in the holding table will point to the new_ids of the
304
        copied rows, rather than the original ids.  Rows in `source_table`
305
        will only be copied to their holding table if all rows they are joined
306
        with through the `joins` parameter are also copied.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
307
308
        When joining, the added tables' columns are not included in the
4514.2.1 by Jeroen Vermeulen
Improved documentation.
309
        holding table, but `where_clause` may still select on them.
4271.2.9 by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs
310
4514.2.1 by Jeroen Vermeulen
Improved documentation.
311
        :param source_table: table to extract data from (and where the
312
            extracted data will ultimately be poured back to).
313
        :param joins: list of foreign keys from `source_table` to other source
314
            tables being copied in the same operation.  By default, each of
315
            these foreign keys is assumed to refer to a table of the same
316
            name.  When breaking this convention, override the
317
            `_pointsToTable` method to provide the right target table for each
318
            foreign key that the operation should know about.
319
        :param where_clause: Boolean SQL expression characterizing rows to be
5396.2.5 by Jeroen Vermeulen
Cosmetic changes based on review.
320
            extracted.  The WHERE clause may refer to rows from table being
321
            extracted as "source."
4514.2.1 by Jeroen Vermeulen
Improved documentation.
322
        :param id_sequence: SQL sequence that should assign new identifiers
323
            for the extracted rows.  Defaults to `source_table` with "_seq_id"
324
            appended, which by SQLObject/Launchpad convention is the sequence
325
            that provides `source_table`'s primary key values.  Used verbatim,
326
            without quoting.
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
327
        :param inert_where: Boolean SQL expression characterizing rows that
328
            are extracted, but should not poured back into `source_table`
329
            during the pouring stage.  For these rows, new_id will be null.
330
            This clause is executed in a separate query, therefore will have
331
            no access to any tables other than the newly created holding
332
            table.  The clause can reference the holding table under the name
4568.1.17 by Jeroen Vermeulen
Rearranged, cleaned up & fixed how foreign keys in holding tables are patched up.
333
            "holding."  Any foreign keys from `joins` will still contain the
4726.1.3 by Jeroen Vermeulen
Fix typo found in review.
334
            values they had in `source_table`, but for each "x" of these
4568.1.18 by Jeroen Vermeulen
Got tests passing again!
335
            foreign keys, the holding table will have a column "new_x" that
336
            holds the redirected foreign key.
4918.4.5 by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations.
337
        :param pre_pouring_callback: a callback that is called just before
338
            pouring this table.  At that time the holding table will no longer
339
            have its new_id column, its values having been copied to the
340
            regular id column.  This means that the copied rows' original ids
341
            are no longer known.  The callback takes as arguments the holding
342
            table's name and the source table's name.  The callback may be
343
            invoked more than once if pouring is interrupted and later
344
            resumed.
4726.1.2 by Jeroen Vermeulen
Changes based on review.
345
        :param batch_pouring_callback: a callback that is called before each
346
            batch of rows is poured, within the same transaction that pours
347
            those rows.  It takes as arguments the holding table's name; the
348
            source table's name; current batch size; the id of the first row
349
            being poured; and the id where the batch stops.  The "end id" is
350
            exclusive, so a row with that id is not copied (and in fact may
351
            not even exist).  The time spent by `batch_ouring_callback` is
352
            counted as part of the batch's processing time.
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
353
        :param external_joins: a list of tables to join into the extraction
354
            query, so that the extraction condition can select on them.  Each
355
            entry must be a string either simply naming a table ("Person"), or
356
            naming a table and providing a name for it in the query (e.g.,
357
            "Person p").  Do the latter if the same table also occurs
358
            elsewhere in the same query.  Your `where_clause` can refer to the
359
            rows included in this way by the names you give or by their table
360
            name, SQL rules permitting.  If your join yields multiple rows
361
            that have the same `source_table` id, only one arbitrary pick of
362
            those will be extracted.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
363
        """
364
        if id_sequence is None:
365
            id_sequence = "%s_id_seq" % source_table.lower()
366
367
        if joins is None:
368
            joins = []
369
4918.4.8 by Jeroen Vermeulen
Re-disable sequential scans after every commit, because the database connection will be reset at that point.
370
        self.batch_pouring_callbacks[source_table] = batch_pouring_callback
371
        self.pre_pouring_callbacks[source_table] = pre_pouring_callback
4918.4.5 by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations.
372
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
373
        if external_joins is None:
374
            external_joins = []
375
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
376
        source_table = str(source_table)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
377
        self._checkExtractionOrder(source_table)
378
379
        holding_table = self.getHoldingTableName(source_table)
380
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
381
        self.logger.info('Extracting from %s into %s...' % (
13011.4.2 by Stuart Bishop
Delint
382
            source_table, holding_table))
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
383
384
        starttime = time.time()
385
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
386
        cur = self._selectToHolding(source_table, joins, external_joins,
387
            where_clause, holding_table, id_sequence, inert_where)
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
388
389
        if len(joins) > 0:
390
            self._retargetForeignKeys(holding_table, joins, cur)
391
392
        # Now that our new holding table is in a stable state, index its id
393
        self._indexIdColumn(holding_table, source_table, cur)
394
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
395
        self.logger.debug(
13011.4.2 by Stuart Bishop
Delint
396
            '...Extracted in %.3f seconds' % (time.time() - starttime))
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
397
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
398
    def _selectToHolding(self, source_table, joins, external_joins,
399
            where_clause, holding_table, id_sequence, inert_where):
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
400
        """Create holding table based on data from source table.
401
402
        We don't need to know what's in the source table exactly; we just
403
        create a new table straight from a "select *."  Except we also add a
404
        few columns for the purpose of retargeting foreign keys, as well as a
405
        new_id column.
406
        """
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
407
        source_table = str(source_table)
5396.2.2 by Jeroen Vermeulen
WHERE clause in extract() can now refer to source-table row as 'source.'
408
        select = ['source.*']
409
        from_list = ["%s source" % source_table]
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
410
        where = []
411
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
412
        for join in joins:
413
            join = str(join)
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
414
            referenced_table = self._pointsToTable(source_table, join)
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
415
            referenced_holding = self.getHoldingTableName(referenced_table)
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
416
            column = join.lower()
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
417
418
            self._checkForeignKeyOrder(column, referenced_table)
419
5396.2.5 by Jeroen Vermeulen
Cosmetic changes based on review.
420
            select.append('%s.new_id AS new_%s' % (
421
                referenced_holding, column))
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
422
            from_list.append(referenced_holding)
423
            where.append('%s = %s.id' % (column, referenced_holding))
424
425
        from_list.extend(external_joins)
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
426
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
427
        if where_clause is not None:
428
            where.append('(%s)' % where_clause)
429
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
430
        where_text = ''
431
        if len(where) > 0:
432
            where_text = 'WHERE %s' % ' AND '.join(where)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
433
434
        # For each row we append at the end any new foreign key values, and
435
        # finally a "new_id" holding its future id field.  This new_id value
436
        # is allocated from the original table's id sequence, so it will be
437
        # unique in the original table.
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
438
        table_creation_parameters = {
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
439
            'columns': ','.join(select),
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
440
            'holding_table': holding_table,
4568.1.8 by Jeroen Vermeulen
Compile fixes.
441
            'id_sequence': "nextval('%s'::regclass)" % id_sequence,
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
442
            'inert_where': inert_where,
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
443
            'source_tables': ','.join(from_list),
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
444
            'where': where_text,
445
            'temp': '',
446
            }
4568.1.8 by Jeroen Vermeulen
Compile fixes.
447
        if not self.restartable:
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
448
            table_creation_parameters['temp'] = 'TEMP'
449
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
450
        cur = cursor()
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
451
452
        # Create holding table directly from select result.
4568.1.8 by Jeroen Vermeulen
Compile fixes.
453
        if inert_where is None:
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
454
            # We'll be pouring all rows from this table.  To avoid a costly
455
            # second write pass (which would rewrite all records in the
456
            # holding table), we assign new_ids right in the same query.
457
            cur.execute('''
458
                CREATE %(temp)s TABLE %(holding_table)s AS
5396.2.2 by Jeroen Vermeulen
WHERE clause in extract() can now refer to source-table row as 'source.'
459
                SELECT DISTINCT ON (source.id)
4568.1.11 by Jeroen Vermeulen
Many more fixes. Now to get the tests to pass...
460
                    %(columns)s, %(id_sequence)s AS new_id
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
461
                FROM %(source_tables)s
4918.4.11 by Jeroen Vermeulen
Removed more temp table usage. Also, commit regularly and analyze holding tables before pouring.
462
                %(where)s
463
                ORDER BY id''' % table_creation_parameters)
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
464
        else:
465
            # Some of the rows may have to have null new_ids.  To avoid
466
            # wasting "address space" on the sequence, we populate the entire
467
            # holding table with null new_ids, then fill in new_id only for
468
            # rows that do not match the "inert_where" condition.
469
            cur.execute('''
470
                CREATE %(temp)s TABLE %(holding_table)s AS
5396.2.2 by Jeroen Vermeulen
WHERE clause in extract() can now refer to source-table row as 'source.'
471
                SELECT DISTINCT ON (source.id)
4568.1.11 by Jeroen Vermeulen
Many more fixes. Now to get the tests to pass...
472
                    %(columns)s, NULL::integer AS new_id
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
473
                FROM %(source_tables)s
4918.4.11 by Jeroen Vermeulen
Removed more temp table usage. Also, commit regularly and analyze holding tables before pouring.
474
                %(where)s
475
                ORDER BY id''' % table_creation_parameters)
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
476
            cur.execute('''
477
                UPDATE %(holding_table)s AS holding
478
                SET new_id = %(id_sequence)s
479
                WHERE NOT (%(inert_where)s)
480
                ''' % table_creation_parameters)
481
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
482
        return cur
483
484
    def _indexIdColumn(self, holding_table, source_table, cur):
485
        """Index id column on holding table.
486
487
        Creates a unique index on "id" column in holding table.  The index
488
        gets the name of the holding table with "_id" appended.
489
        """
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
490
        source_table = str(source_table)
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
491
        self.logger.debug("Indexing %s" % holding_table)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
492
        cur.execute('''
493
            CREATE UNIQUE INDEX %s
494
            ON %s (id)
495
        ''' % (self.getHoldingTableName(source_table, 'id'), holding_table))
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
496
497
    def _retargetForeignKeys(self, holding_table, joins, cur):
498
        """Replace foreign keys in new holding table.
499
500
        Set the values of the foreign keys that are to be retargeted to those
501
        in their respective "new_" variants, then drop those "new_" columns we
502
        added from the holding table.
503
        """
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
504
        columns = [join.lower() for join in joins]
4271.3.17 by Jeroen Vermeulen
Merge from rocketfuel, including Great Renaming of DistroRelease to DistroSeries
505
        fk_updates = ['%s = new_%s' % (column, column) for column in columns]
4271.2.17 by Jeroen Vermeulen
Changes based on review by Tim Penhey
506
        updates = ', '.join(fk_updates)
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
507
        self.logger.debug("Redirecting foreign keys: %s" % updates)
4271.2.17 by Jeroen Vermeulen
Changes based on review by Tim Penhey
508
        cur.execute("UPDATE %s SET %s" % (holding_table, updates))
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
509
        for column in columns:
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
510
            self.logger.debug("Dropping foreign-key column %s" % column)
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
511
            cur.execute("ALTER TABLE %s DROP COLUMN new_%s"
512
                        % (holding_table, column))
4271.2.4 by Jeroen Vermeulen
Updated documentation; removed last references to translation tables
513
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
514
    def needsRecovery(self):
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
515
        """Do we have holding tables with recoverable data from previous run?
4271.2.4 by Jeroen Vermeulen
Updated documentation; removed last references to translation tables
516
517
        Returns Boolean answer.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
518
        """
519
520
        cur = cursor()
521
522
        # If there are any holding tables to be poured into their source
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
523
        # tables, there must at least be one for the last table that pour()
524
        # processes.
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
525
        last_holding_table = self.getRawHoldingTableName(self.tables[-1])
526
        if not postgresql.have_table(cur, last_holding_table):
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
527
            return False
528
529
        # If the first table in our list also still exists, and it still has
530
        # its new_id column, then the pouring process had not begun yet.
531
        # Assume the data was not ready for pouring.
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
532
        first_holding_table = self.getRawHoldingTableName(self.tables[0])
533
        if postgresql.table_has_column(cur, first_holding_table, 'new_id'):
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
534
            self.logger.info(
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
535
                "Previous run aborted too early for recovery; redo all")
536
            return False
537
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
538
        self.logger.info("Recoverable data found")
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
539
        return True
540
4271.3.19 by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context
541
    def pour(self, transaction_manager):
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
542
        """Pour data from holding tables back into source tables.
543
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
544
        Rows in the holding table that have their new_id set to null are
545
        skipped.
546
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
547
        The transaction manager is committed and re-opened after every batch
548
        run.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
549
550
        Batch sizes are dynamically adjusted to meet the stated time goal.
551
        """
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
552
        if self.last_extracted_table is None:
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
553
            if not self.needsRecovery():
4271.3.16 by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__()
554
                raise AssertionError("Can't pour: no tables extracted")
555
        elif self.last_extracted_table != len(self.tables) - 1:
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
556
            raise AssertionError(
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
557
                "Not safe to pour: last table '%s' was not extracted"
558
                % self.tables[-1])
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
559
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
560
        cur = self._commit(transaction_manager)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
561
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
562
        # Don't let postgres revert to slow sequential scans while we pour.
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
563
        # That might otherwise happen to the holding table as its vital "id"
564
        # index degrades with the removal of rows.
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
565
        postgresql.allow_sequential_scans(cur, False)
566
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
567
        # Main loop: for each of the source tables being copied, see if
568
        # there's a matching holding table.  If so, prepare it, pour it back
569
        # into the source table, and drop.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
570
        for table in self.tables:
4271.2.10 by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing
571
            holding_table_unquoted = self.getRawHoldingTableName(table)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
572
573
            if not postgresql.have_table(cur, holding_table_unquoted):
574
                # We know we're in a suitable state for pouring.  If this
575
                # table does not exist, it must be because it's been poured
576
                # out completely and dropped in an earlier instance of this
577
                # loop, before the failure we're apparently recovering from.
578
                continue
579
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
580
            holding_table = self.getHoldingTableName(table)
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
581
            self.logger.info("Pouring %s back into %s..."
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
582
                         % (holding_table, table))
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
583
584
            tablestarttime = time.time()
585
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
586
            has_new_id = postgresql.table_has_column(
587
                cur, holding_table_unquoted, 'new_id')
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
588
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
589
            self._pourTable(
590
                holding_table, table, has_new_id, transaction_manager)
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
591
592
            # Drop holding table.  It may still contain rows with id set to
593
            # null.  Those must not be poured.
4271.3.2 by Jeroen Vermeulen
Renamed TunedLoop to TunableLoop based on pre-impl chat with BjornT.
594
            postgresql.drop_tables(cursor(), holding_table)
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
595
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
596
            self.logger.debug(
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
597
                "Pouring %s took %.3f seconds."
13011.4.2 by Stuart Bishop
Delint
598
                % (holding_table, time.time() - tablestarttime))
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
599
3691.8.105 by Carlos Perello Marin
Applied review comments for postmerge review of r4272 + bug fix for bug #122363
600
            cur = self._commit(transaction_manager)
4271.3.10 by Jeroen Vermeulen
Oops, another hurried typo
601
4918.4.22 by Jeroen Vermeulen
Changes based on reviews by bac and jml.
602
        # In future, let the database perform sequential scans again if it
603
        # decides that's best.
604
        postgresql.allow_sequential_scans(cur, True)
605
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
606
    def _pourTable(
607
        self, holding_table, table, has_new_id, transaction_manager):
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
608
        """Pour contents of a holding table back into its source table.
609
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
610
        This will commit transaction_manager, typically multiple times.
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
611
        """
612
        if has_new_id:
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
613
            # Update ids in holding table from originals to copies.  To
614
            # broaden the caller's opportunity to manipulate rows in the
615
            # holding tables, we skip rows that have new_id set to null.
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
616
            cur = cursor()
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
617
            cur.execute("UPDATE %s SET id=new_id" % holding_table)
618
            # Restore table to original schema
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
619
            cur.execute("ALTER TABLE %s DROP COLUMN new_id" % holding_table)
4568.1.1 by Jeroen Vermeulen
Several new features to support online distroseries translation updates.
620
            self._commit(transaction_manager)
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
621
            self.logger.debug("...rearranged ids...")
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
622
4918.4.5 by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations.
623
        callback = self.pre_pouring_callbacks.get(table)
624
        if callback is not None:
625
            callback(holding_table, table)
626
4271.3.1 by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code
627
        # Now pour holding table's data into its source table.  This is where
628
        # we start writing to tables that other clients will be reading, and
629
        # our transaction will usually be serializable, so row locks are a
630
        # concern.  Break the writes up in batches of at least a thousand
631
        # rows.  The goal is to have these transactions running no longer than
632
        # five seconds or so each; we aim for four just to be sure.
633
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
634
        pourer = PouringLoop(
4726.1.2 by Jeroen Vermeulen
Changes based on review.
635
            holding_table, table, transaction_manager, self.logger,
636
            self.batch_pouring_callbacks.get(table))
7675.193.2 by Henning Eggers
Use DBLoopTuner for pouring in MultiTableCopy.
637
        DBLoopTuner(
4271.3.15 by Jeroen Vermeulen
Changes based on review by bac.
638
            pourer, self.seconds_per_batch, self.minimum_batch_size).run()
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
639
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
640
    def _checkExtractionOrder(self, source_table):
641
        """Verify order in which tables are extracted against tables list.
642
4271.2.9 by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs
643
        Check that the caller follows the stated plan, extracting from tables
644
        in the same order as in self.tables.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
645
        """
646
        try:
647
            table_number = self.tables.index(source_table)
648
        except ValueError:
649
            raise AssertionError(
650
                "Can't extract '%s': not in list of tables" % source_table)
651
652
        if self.last_extracted_table is None:
653
            # Can't skip the first table!
654
            if table_number > 0:
655
                raise AssertionError(
4271.3.14 by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey.
656
                    "Can't extract: skipped first table '%s'"
657
                    % self.tables[0])
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
658
        else:
659
            if table_number < self.last_extracted_table:
660
                raise AssertionError(
661
                    "Table '%s' extracted after its turn" % source_table)
13011.4.2 by Stuart Bishop
Delint
662
            if table_number > self.last_extracted_table + 1:
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
663
                raise AssertionError(
664
                    "Table '%s' extracted before its turn" % source_table)
4271.2.9 by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs
665
            if table_number == self.last_extracted_table:
666
                raise AssertionError(
667
                    "Table '%s' extracted again" % source_table)
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
668
669
        self.last_extracted_table = table_number
670
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
671
    def _checkForeignKeyOrder(self, fk, referenced_table):
672
        """Verify that we're in a position to "retarget" a foreign key.
673
674
        We've been asked to retarget a foreign key while copying.  Check that
675
        the table it refers to has already been copied.
676
        """
4568.1.9 by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table.
677
        lower_name = referenced_table.lower()
678
        lower_tables = self.lower_tables
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
679
        try:
4568.1.9 by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table.
680
            target_number = lower_tables.index(lower_name)
4271.2.13 by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste.
681
        except ValueError:
682
            raise AssertionError(
683
                "Foreign key '%s' refers to table '%s' "
684
                "which is not being copied" % (fk, referenced_table))
685
4568.1.9 by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table.
686
        if target_number > self.last_extracted_table:
687
            raise AssertionError(
688
                "Foreign key '%s' refers to table '%s' "
689
                "which is to be copied later" % (fk, referenced_table))
690
        if target_number == self.last_extracted_table:
691
            raise AssertionError(
692
                "Foreign key '%s' in table '%s' "
693
                "is a self-reference" % (fk, referenced_table))
694
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
695
    def _commit(self, transaction_manager):
696
        """Commit our transaction and create replacement cursor.
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
697
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
698
        Use this as "cur = self._commit(transaction_manager)" to commit a
699
        transaction, restart it, and create a cursor that lives within the new
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
700
        transaction.
701
        """
702
        start = time.time()
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
703
        transaction_manager.commit()
4692.1.1 by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line.
704
        self.logger.debug("Committed in %.3f seconds" % (time.time() - start))
4271.3.18 by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196.
705
        transaction_manager.begin()
4271.2.1 by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196
706
        return cursor()