8687.15.18
by Karl Fogel
Add the copyright header block to files under lib/canonical/. |
1 |
# Copyright 2009 Canonical Ltd. This software is licensed under the
|
2 |
# GNU Affero General Public License version 3 (see the file LICENSE).
|
|
4271.2.6
by Jeroen Vermeulen
Added header similar to that of other modules |
3 |
|
4 |
__metaclass__ = type |
|
5 |
||
13011.4.2
by Stuart Bishop
Delint |
6 |
__all__ = ['MultiTableCopy'] |
4271.2.6
by Jeroen Vermeulen
Added header similar to that of other modules |
7 |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
8 |
import logging |
13011.4.1
by Stuart Bishop
Refused to allow multitablecopy to create confusing tablenames |
9 |
import re |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
10 |
import time |
11 |
||
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
12 |
from zope.interface import implements |
13 |
||
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
14 |
from canonical.database import postgresql |
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
15 |
from canonical.database.sqlbase import (cursor, quote, quoteIdentifier) |
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
16 |
from canonical.launchpad.interfaces.looptuner import ITunableLoop |
7675.193.2
by Henning Eggers
Use DBLoopTuner for pouring in MultiTableCopy. |
17 |
from canonical.launchpad.utilities.looptuner import DBLoopTuner |
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
18 |
|
19 |
||
20 |
class PouringLoop: |
|
4271.3.2
by Jeroen Vermeulen
Renamed TunedLoop to TunableLoop based on pre-impl chat with BjornT. |
21 |
"""Loop body to pour data from holding tables back into source tables.
|
22 |
||
7675.193.2
by Henning Eggers
Use DBLoopTuner for pouring in MultiTableCopy. |
23 |
Used by MultiTableCopy internally to tell DBLoopTuner what to do.
|
4271.3.2
by Jeroen Vermeulen
Renamed TunedLoop to TunableLoop based on pre-impl chat with BjornT. |
24 |
"""
|
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
25 |
implements(ITunableLoop) |
26 |
||
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
27 |
def __init__(self, from_table, to_table, transaction_manager, logger, |
28 |
batch_pouring_callback=None): |
|
29 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
30 |
self.from_table = str(from_table) |
31 |
self.to_table = str(to_table) |
|
32 |
self.transaction_manager = transaction_manager |
|
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
33 |
self.logger = logger |
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
34 |
self.batch_pouring_callback = batch_pouring_callback |
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
35 |
self.cur = cursor() |
36 |
||
37 |
self.cur.execute("SELECT min(id), max(id) FROM %s" % from_table) |
|
4574.2.1
by Jeroen Vermeulen
Small change from review that was missed earlier. |
38 |
self.lowest_id, self.highest_id = self.cur.fetchone() |
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
39 |
|
40 |
if self.lowest_id is None: |
|
41 |
# Table is empty.
|
|
42 |
self.lowest_id = 1 |
|
43 |
self.highest_id = 0 |
|
44 |
||
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
45 |
self.logger.debug("Up to %d rows in holding table" |
46 |
% (self.highest_id + 1 - self.lowest_id)) |
|
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
47 |
|
4271.3.3
by Jeroen Vermeulen
Removed unused post(), renamed done() to meet naming requirements & pre-impl suggestions |
48 |
def isDone(self): |
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
49 |
"""See `ITunableLoop`."""
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
50 |
return self.lowest_id is None or self.lowest_id > self.highest_id |
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
51 |
|
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
52 |
def __call__(self, batch_size): |
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
53 |
"""See `ITunableLoop`.
|
54 |
||
55 |
Loop body: pour rows with ids up to "next" over to to_table."""
|
|
4271.3.8
by Jeroen Vermeulen
Make chunk_size a float (but round it in MultiTableCopy!), lots more tests |
56 |
batch_size = int(batch_size) |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
57 |
|
58 |
# Figure out what id lies exactly batch_size rows ahead.
|
|
59 |
self.cur.execute(""" |
|
60 |
SELECT id
|
|
61 |
FROM %s |
|
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
62 |
WHERE id >= %s |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
63 |
ORDER BY id
|
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
64 |
OFFSET %s |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
65 |
LIMIT 1
|
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
66 |
""" % (self.from_table, quote(self.lowest_id), quote(batch_size))) |
67 |
end_id = self.cur.fetchone() |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
68 |
|
69 |
if end_id is not None: |
|
70 |
next = end_id[0] |
|
71 |
else: |
|
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
72 |
next = self.highest_id |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
73 |
|
74 |
next += 1 |
|
75 |
||
76 |
self.prepareBatch( |
|
77 |
self.from_table, self.to_table, batch_size, self.lowest_id, next) |
|
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
78 |
|
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
79 |
self.logger.debug("pouring %s: %d rows (%d-%d)" % ( |
80 |
self.from_table, batch_size, self.lowest_id, next)) |
|
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
81 |
|
4271.3.21
by Jeroen Vermeulen
Few more improvements suggested by reviewer. |
82 |
self.cur.execute("INSERT INTO %s (SELECT * FROM %s WHERE id < %d)" |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
83 |
% (self.to_table, self.from_table, next)) |
84 |
||
4271.3.21
by Jeroen Vermeulen
Few more improvements suggested by reviewer. |
85 |
self.cur.execute("DELETE FROM %s WHERE id < %d" |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
86 |
% (self.from_table, next)) |
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
87 |
|
88 |
self.lowest_id = next |
|
89 |
self._commit() |
|
90 |
||
91 |
def _commit(self): |
|
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
92 |
"""Commit ongoing transaction, start a new one."""
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
93 |
self.transaction_manager.commit() |
94 |
self.transaction_manager.begin() |
|
95 |
self.cur = cursor() |
|
4918.4.22
by Jeroen Vermeulen
Changes based on reviews by bac and jml. |
96 |
# Disable slow sequential scans. The database server is reluctant to
|
97 |
# use indexes on tables that undergo large changes, such as the
|
|
98 |
# deletion of large numbers of rows in this case. Usually it's
|
|
99 |
# right but in this case it seems to slow things down dramatically and
|
|
100 |
# unnecessarily. We disable sequential scans for every commit since
|
|
101 |
# initZopeless by default resets our database connection with every
|
|
102 |
# new transaction.
|
|
103 |
# MultiTableCopy disables sequential scans for the first batch; this
|
|
104 |
# just renews our setting after the connection is reset.
|
|
4918.4.8
by Jeroen Vermeulen
Re-disable sequential scans after every commit, because the database connection will be reset at that point. |
105 |
postgresql.allow_sequential_scans(self.cur, False) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
106 |
|
5396.2.5
by Jeroen Vermeulen
Cosmetic changes based on review. |
107 |
def prepareBatch( |
108 |
self, from_table, to_table, batch_size, begin_id, end_id): |
|
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
109 |
"""If batch_pouring_callback is defined, call it."""
|
110 |
if self.batch_pouring_callback is not None: |
|
111 |
self.batch_pouring_callback( |
|
112 |
from_table, to_table, batch_size, begin_id, end_id) |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
113 |
|
13011.4.2
by Stuart Bishop
Delint |
114 |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
115 |
class MultiTableCopy: |
116 |
"""Copy interlinked data spanning multiple tables in a coherent fashion.
|
|
117 |
||
4271.2.4
by Jeroen Vermeulen
Updated documentation; removed last references to translation tables |
118 |
This allows data from a combination of tables, possibly with foreign-key
|
119 |
references between them, to be copied to a set of corresponding "holding
|
|
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
120 |
tables"; processed and modified there; and then be inserted back to the
|
4271.2.4
by Jeroen Vermeulen
Updated documentation; removed last references to translation tables |
121 |
original tables. The holding tables are created on demand and dropped
|
122 |
upon completion.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
123 |
|
4271.2.10
by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing |
124 |
You can tell the algorithm to redirect foreign keys. Say you're copying a
|
125 |
row x1 in a table X, and x1 has a foreign key referring to a row y1 in a
|
|
126 |
table Y that you're also copying. You will get copied rows x2 and y2
|
|
127 |
respectively. But if you declare the foreign-key relationship between X
|
|
128 |
and Y to the algorithm, then x2's instance of that foreign key will refer
|
|
129 |
not to y1 but to the new y2. Any rows in X whose associated rows of Y are
|
|
130 |
not copied, are also not copied. This can be useful when copying data in
|
|
5121.2.7
by Stuart Bishop
More required code changes |
131 |
entire sub-trees of the schema graph, e.g. "one distroseries and all the
|
4271.2.10
by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing |
132 |
translations associated with it."
|
133 |
||
134 |
All this happens in a two-stage process:
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
135 |
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
136 |
1. Extraction stage. Use the extract() method to copy selected data to a
|
137 |
holding table, one table at a time. Ordering matters: always do this in
|
|
138 |
such an order that the table you are extracting has no foreign-key
|
|
139 |
references to another table that you are yet to extract.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
140 |
|
141 |
This stage is relatively fast and holds no locks on the database. Do any
|
|
142 |
additional processing on the copied rows in the holding tables, during or
|
|
143 |
after the extraction stage, so you do not hold any locks on the source
|
|
144 |
tables yourself. It's up to you to make sure that all of the rows in the
|
|
145 |
holding tables can be inserted into their source tables: if you leave
|
|
146 |
primary keys and such unchanged, unique constraints will be violated in
|
|
147 |
the next stage.
|
|
148 |
||
149 |
2. Pouring stage. All data from the holding tables is inserted back into
|
|
150 |
the source tables. This entire stage, which normally takes the bulk of
|
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
151 |
the copying time, is performed by calling the pour method.
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
152 |
|
153 |
This stage will lock the rows that are being inserted in the source
|
|
154 |
tables, if the database is so inclined (e.g. when using postgres with
|
|
155 |
SERIALIZABLE isolation level). For that reason, the pouring is done in
|
|
156 |
smaller, controlled batches. If you give the object a database
|
|
157 |
transaction to work with, that transaction will be committed and restarted
|
|
158 |
between batches.
|
|
159 |
||
160 |
A MultiTableCopy is restartable. If the process should fail for any
|
|
161 |
reason, the holding tables will be left in one of two states: if stage 1
|
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
162 |
has not completed, needsRecovery will return False. In that case, drop
|
163 |
the holding tables using dropHoldingTables and either start again (or give
|
|
164 |
up). But if a previous run did complete the extraction stage, the holding
|
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
165 |
tables will remain and contain valid data. In that case, just pour again
|
166 |
to continue the work (and hopefully complete it this time).
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
167 |
|
168 |
Holding tables will have names like "temp_POMsgSet_holding_ubuntu_feisty",
|
|
169 |
in this case for one holding data extracted from source table POMsgSet by
|
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
170 |
a MultiTableCopy called "ubuntu_feisty". The holding tables are dropped
|
171 |
when we're done, but they are not temp tables. They have to be persistent
|
|
172 |
so we get a chance to resume interrupted copies and analyse failures.
|
|
173 |
||
174 |
# We use a regular, persistent table rather than a temp table for
|
|
175 |
# this so that we get a chance to resume interrupted copies, and
|
|
176 |
# analyse failures. If we used temp tables, they'd be gone by the
|
|
177 |
# time we knew something went wrong.
|
|
4271.2.9
by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs |
178 |
|
179 |
The tables to be copied must meet a number of conventions:
|
|
180 |
||
181 |
* First column must be an integer primary key called "id."
|
|
182 |
||
183 |
* id values must be assigned by a sequence, with a name that can be used
|
|
184 |
in SQL without quoting.
|
|
185 |
||
186 |
* Every foreign-key column that refers to a table that is also being
|
|
4271.2.14
by Jeroen Vermeulen
Final documentation touches before requesting review |
187 |
copied, has the same name as the table it refers to. This can be
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
188 |
changed by subclassing and overriding the _pointsToTable method.
|
4271.2.9
by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs |
189 |
|
190 |
* Foreign-key column names and the tables they refer to can be used in
|
|
191 |
SQL without quoting.
|
|
192 |
||
193 |
* For any foreign key column "x" referring to another table that is also
|
|
194 |
being copied, there must not be a column called "new_x"
|
|
4271.2.14
by Jeroen Vermeulen
Final documentation touches before requesting review |
195 |
|
196 |
If you use this class, it is up to you to ensure that the data that you
|
|
197 |
copy does not break compliance between the time it is extracted from its
|
|
198 |
source tables and the moment it has been poured back there. This means
|
|
199 |
that rows that the data refers to by foreign keys must not be deleted
|
|
200 |
while the multi-table copy is running, for instance.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
201 |
"""
|
202 |
||
4486.1.1
by Jeroen Vermeulen
Make MultiTableCopy a bit more conservative with batch sizes |
203 |
def __init__(self, name, tables, seconds_per_batch=2.0, |
4568.1.14
by Jeroen Vermeulen
Merge from RF. Resolved conflicts on logger passing in multitablecopy & distroseries. |
204 |
minimum_batch_size=500, restartable=True, logger=None): |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
205 |
"""Define a MultiTableCopy, including an in-order list of tables.
|
206 |
||
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
207 |
:param name: a unique identifier for this MultiTableCopy operation,
|
208 |
e.g. "ubuntu_feisty". The name will be included in the names of
|
|
209 |
holding tables.
|
|
210 |
:param tables: a list of tables that will be extracted and poured,
|
|
211 |
in the order in which they will be extracted (and later, poured).
|
|
212 |
This is essential when analyzing recoverable state. You may
|
|
213 |
attempt multiple extractions from the same table, but all tables
|
|
214 |
listed must be extracted. If you do not wish to copy any rows
|
|
215 |
from a source table, extract with "false" as its where clause.
|
|
216 |
:param seconds_per_batch: a time goal (in seconds) to define how long,
|
|
217 |
ideally, the algorithm should be allowed to hold locks on the
|
|
218 |
source tables. It will try to do the work in batches that take
|
|
219 |
about this long.
|
|
220 |
:param minimum_batch_size: minimum number of rows to pour in one
|
|
221 |
batch. You may want to set this to stop the algorithm from
|
|
222 |
resorting to endless single-row batches in situations where
|
|
223 |
response times are too slow but batch size turns out not to matter
|
|
224 |
much.
|
|
225 |
:param restartable: whether you want the remaining data to be
|
|
226 |
available for recovery if the connection (or the process) fails
|
|
227 |
while in the pouring stage. If False, will extract to temp
|
|
4918.4.7
by Jeroen Vermeulen
Added testing for pre-pouring callbacks. |
228 |
tables. CAUTION: our connections currently get reset every time
|
229 |
we commit a transaction, obliterating any temp tables possibly in
|
|
230 |
the middle of the pouring process!
|
|
4568.1.14
by Jeroen Vermeulen
Merge from RF. Resolved conflicts on logger passing in multitablecopy & distroseries. |
231 |
:param logger: a logger to write informational output to. If none is
|
232 |
given, the default is used.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
233 |
"""
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
234 |
self.name = str(name) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
235 |
self.tables = tables |
4271.2.17
by Jeroen Vermeulen
Changes based on review by Tim Penhey |
236 |
self.lower_tables = [table.lower() for table in self.tables] |
4271.3.15
by Jeroen Vermeulen
Changes based on review by bac. |
237 |
self.seconds_per_batch = seconds_per_batch |
4271.3.12
by Jeroen Vermeulen
Configurable batching on MultiTableCopy; test with tiny batches |
238 |
self.minimum_batch_size = minimum_batch_size |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
239 |
self.last_extracted_table = None |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
240 |
self.restartable = restartable |
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
241 |
self.batch_pouring_callbacks = {} |
4918.4.5
by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations. |
242 |
self.pre_pouring_callbacks = {} |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
243 |
if logger is not None: |
244 |
self.logger = logger |
|
245 |
else: |
|
246 |
self.logger = logging |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
247 |
|
248 |
def dropHoldingTables(self): |
|
4271.2.4
by Jeroen Vermeulen
Updated documentation; removed last references to translation tables |
249 |
"""Drop any holding tables that may exist for this MultiTableCopy."""
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
250 |
holding_tables = [self.getHoldingTableName(table) |
251 |
for table in self.tables] |
|
252 |
postgresql.drop_tables(cursor(), holding_tables) |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
253 |
|
4271.2.10
by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing |
254 |
def getRawHoldingTableName(self, tablename, suffix=''): |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
255 |
"""Name for a holding table, but without quotes. Use with care."""
|
256 |
if suffix: |
|
257 |
suffix = '_%s' % suffix |
|
13011.4.1
by Stuart Bishop
Refused to allow multitablecopy to create confusing tablenames |
258 |
|
259 |
assert re.search(r'[^a-z_]', tablename + suffix) is None, ( |
|
260 |
'Unsupported characters in table name per Bug #179821') |
|
261 |
||
262 |
raw_name = "temp_%s_holding_%s%s" % ( |
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
263 |
str(tablename), self.name, str(suffix)) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
264 |
|
13011.4.1
by Stuart Bishop
Refused to allow multitablecopy to create confusing tablenames |
265 |
return raw_name |
266 |
||
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
267 |
def getHoldingTableName(self, tablename, suffix=''): |
268 |
"""Name for a holding table to hold data being copied in tablename.
|
|
269 |
||
270 |
Return value is properly quoted for use as an SQL identifier.
|
|
271 |
"""
|
|
4271.2.18
by Jeroen Vermeulen
More feedback from various reviewers |
272 |
raw_name = self.getRawHoldingTableName(tablename, suffix) |
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
273 |
return quoteIdentifier(raw_name) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
274 |
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
275 |
def _pointsToTable(self, source_table, foreign_key): |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
276 |
"""Name of table that source_table.foreign_key refers to.
|
277 |
||
278 |
By default, all foreign keys that play a role in the MultiTableCopy
|
|
279 |
are expected to have the same names as the tables they refer to. If
|
|
280 |
that is not the case for your copy, subclass this class and override
|
|
281 |
this method with a version that has specific knowledge of what points
|
|
282 |
where.
|
|
283 |
"""
|
|
284 |
return foreign_key |
|
285 |
||
4568.1.9
by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table. |
286 |
def extract(self, source_table, joins=None, where_clause=None, |
4918.4.5
by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations. |
287 |
id_sequence=None, inert_where=None, pre_pouring_callback=None, |
288 |
batch_pouring_callback=None, external_joins=None): |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
289 |
"""Extract (selected) rows from source_table into a holding table.
|
290 |
||
4514.2.1
by Jeroen Vermeulen
Improved documentation. |
291 |
The holding table gets an additional new_id column with identifiers
|
292 |
generated by `id_sequence`. Apart from this extra column, (and
|
|
293 |
indexes and constraints), the holding table is schematically identical
|
|
294 |
to `source_table`. A unique index is created for the original id
|
|
295 |
column.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
296 |
|
297 |
There is a special facility for redirecting foreign keys to other
|
|
4514.2.1
by Jeroen Vermeulen
Improved documentation. |
298 |
tables in the same copy operation. The `joins` argument can pass a
|
299 |
list of foreign keys. The foreign keys given in joins must be columns
|
|
300 |
of `source_table`, and refer to tables that are also being copied.
|
|
301 |
The selection used in populating the holding table for `source_table`
|
|
302 |
will be joined on each of the foreign keys listed in `joins`. The
|
|
303 |
foreign keys in the holding table will point to the new_ids of the
|
|
304 |
copied rows, rather than the original ids. Rows in `source_table`
|
|
305 |
will only be copied to their holding table if all rows they are joined
|
|
306 |
with through the `joins` parameter are also copied.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
307 |
|
308 |
When joining, the added tables' columns are not included in the
|
|
4514.2.1
by Jeroen Vermeulen
Improved documentation. |
309 |
holding table, but `where_clause` may still select on them.
|
4271.2.9
by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs |
310 |
|
4514.2.1
by Jeroen Vermeulen
Improved documentation. |
311 |
:param source_table: table to extract data from (and where the
|
312 |
extracted data will ultimately be poured back to).
|
|
313 |
:param joins: list of foreign keys from `source_table` to other source
|
|
314 |
tables being copied in the same operation. By default, each of
|
|
315 |
these foreign keys is assumed to refer to a table of the same
|
|
316 |
name. When breaking this convention, override the
|
|
317 |
`_pointsToTable` method to provide the right target table for each
|
|
318 |
foreign key that the operation should know about.
|
|
319 |
:param where_clause: Boolean SQL expression characterizing rows to be
|
|
5396.2.5
by Jeroen Vermeulen
Cosmetic changes based on review. |
320 |
extracted. The WHERE clause may refer to rows from table being
|
321 |
extracted as "source."
|
|
4514.2.1
by Jeroen Vermeulen
Improved documentation. |
322 |
:param id_sequence: SQL sequence that should assign new identifiers
|
323 |
for the extracted rows. Defaults to `source_table` with "_seq_id"
|
|
324 |
appended, which by SQLObject/Launchpad convention is the sequence
|
|
325 |
that provides `source_table`'s primary key values. Used verbatim,
|
|
326 |
without quoting.
|
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
327 |
:param inert_where: Boolean SQL expression characterizing rows that
|
328 |
are extracted, but should not poured back into `source_table`
|
|
329 |
during the pouring stage. For these rows, new_id will be null.
|
|
330 |
This clause is executed in a separate query, therefore will have
|
|
331 |
no access to any tables other than the newly created holding
|
|
332 |
table. The clause can reference the holding table under the name
|
|
4568.1.17
by Jeroen Vermeulen
Rearranged, cleaned up & fixed how foreign keys in holding tables are patched up. |
333 |
"holding." Any foreign keys from `joins` will still contain the
|
4726.1.3
by Jeroen Vermeulen
Fix typo found in review. |
334 |
values they had in `source_table`, but for each "x" of these
|
4568.1.18
by Jeroen Vermeulen
Got tests passing again! |
335 |
foreign keys, the holding table will have a column "new_x" that
|
336 |
holds the redirected foreign key.
|
|
4918.4.5
by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations. |
337 |
:param pre_pouring_callback: a callback that is called just before
|
338 |
pouring this table. At that time the holding table will no longer
|
|
339 |
have its new_id column, its values having been copied to the
|
|
340 |
regular id column. This means that the copied rows' original ids
|
|
341 |
are no longer known. The callback takes as arguments the holding
|
|
342 |
table's name and the source table's name. The callback may be
|
|
343 |
invoked more than once if pouring is interrupted and later
|
|
344 |
resumed.
|
|
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
345 |
:param batch_pouring_callback: a callback that is called before each
|
346 |
batch of rows is poured, within the same transaction that pours
|
|
347 |
those rows. It takes as arguments the holding table's name; the
|
|
348 |
source table's name; current batch size; the id of the first row
|
|
349 |
being poured; and the id where the batch stops. The "end id" is
|
|
350 |
exclusive, so a row with that id is not copied (and in fact may
|
|
351 |
not even exist). The time spent by `batch_ouring_callback` is
|
|
352 |
counted as part of the batch's processing time.
|
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
353 |
:param external_joins: a list of tables to join into the extraction
|
354 |
query, so that the extraction condition can select on them. Each
|
|
355 |
entry must be a string either simply naming a table ("Person"), or
|
|
356 |
naming a table and providing a name for it in the query (e.g.,
|
|
357 |
"Person p"). Do the latter if the same table also occurs
|
|
358 |
elsewhere in the same query. Your `where_clause` can refer to the
|
|
359 |
rows included in this way by the names you give or by their table
|
|
360 |
name, SQL rules permitting. If your join yields multiple rows
|
|
361 |
that have the same `source_table` id, only one arbitrary pick of
|
|
362 |
those will be extracted.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
363 |
"""
|
364 |
if id_sequence is None: |
|
365 |
id_sequence = "%s_id_seq" % source_table.lower() |
|
366 |
||
367 |
if joins is None: |
|
368 |
joins = [] |
|
369 |
||
4918.4.8
by Jeroen Vermeulen
Re-disable sequential scans after every commit, because the database connection will be reset at that point. |
370 |
self.batch_pouring_callbacks[source_table] = batch_pouring_callback |
371 |
self.pre_pouring_callbacks[source_table] = pre_pouring_callback |
|
4918.4.5
by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations. |
372 |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
373 |
if external_joins is None: |
374 |
external_joins = [] |
|
375 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
376 |
source_table = str(source_table) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
377 |
self._checkExtractionOrder(source_table) |
378 |
||
379 |
holding_table = self.getHoldingTableName(source_table) |
|
380 |
||
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
381 |
self.logger.info('Extracting from %s into %s...' % ( |
13011.4.2
by Stuart Bishop
Delint |
382 |
source_table, holding_table)) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
383 |
|
384 |
starttime = time.time() |
|
385 |
||
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
386 |
cur = self._selectToHolding(source_table, joins, external_joins, |
387 |
where_clause, holding_table, id_sequence, inert_where) |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
388 |
|
389 |
if len(joins) > 0: |
|
390 |
self._retargetForeignKeys(holding_table, joins, cur) |
|
391 |
||
392 |
# Now that our new holding table is in a stable state, index its id
|
|
393 |
self._indexIdColumn(holding_table, source_table, cur) |
|
394 |
||
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
395 |
self.logger.debug( |
13011.4.2
by Stuart Bishop
Delint |
396 |
'...Extracted in %.3f seconds' % (time.time() - starttime)) |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
397 |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
398 |
def _selectToHolding(self, source_table, joins, external_joins, |
399 |
where_clause, holding_table, id_sequence, inert_where): |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
400 |
"""Create holding table based on data from source table.
|
401 |
||
402 |
We don't need to know what's in the source table exactly; we just
|
|
403 |
create a new table straight from a "select *." Except we also add a
|
|
404 |
few columns for the purpose of retargeting foreign keys, as well as a
|
|
405 |
new_id column.
|
|
406 |
"""
|
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
407 |
source_table = str(source_table) |
5396.2.2
by Jeroen Vermeulen
WHERE clause in extract() can now refer to source-table row as 'source.' |
408 |
select = ['source.*'] |
409 |
from_list = ["%s source" % source_table] |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
410 |
where = [] |
411 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
412 |
for join in joins: |
413 |
join = str(join) |
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
414 |
referenced_table = self._pointsToTable(source_table, join) |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
415 |
referenced_holding = self.getHoldingTableName(referenced_table) |
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
416 |
column = join.lower() |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
417 |
|
418 |
self._checkForeignKeyOrder(column, referenced_table) |
|
419 |
||
5396.2.5
by Jeroen Vermeulen
Cosmetic changes based on review. |
420 |
select.append('%s.new_id AS new_%s' % ( |
421 |
referenced_holding, column)) |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
422 |
from_list.append(referenced_holding) |
423 |
where.append('%s = %s.id' % (column, referenced_holding)) |
|
424 |
||
425 |
from_list.extend(external_joins) |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
426 |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
427 |
if where_clause is not None: |
428 |
where.append('(%s)' % where_clause) |
|
429 |
||
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
430 |
where_text = '' |
431 |
if len(where) > 0: |
|
432 |
where_text = 'WHERE %s' % ' AND '.join(where) |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
433 |
|
434 |
# For each row we append at the end any new foreign key values, and
|
|
435 |
# finally a "new_id" holding its future id field. This new_id value
|
|
436 |
# is allocated from the original table's id sequence, so it will be
|
|
437 |
# unique in the original table.
|
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
438 |
table_creation_parameters = { |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
439 |
'columns': ','.join(select), |
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
440 |
'holding_table': holding_table, |
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
441 |
'id_sequence': "nextval('%s'::regclass)" % id_sequence, |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
442 |
'inert_where': inert_where, |
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
443 |
'source_tables': ','.join(from_list), |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
444 |
'where': where_text, |
445 |
'temp': '', |
|
446 |
}
|
|
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
447 |
if not self.restartable: |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
448 |
table_creation_parameters['temp'] = 'TEMP' |
449 |
||
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
450 |
cur = cursor() |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
451 |
|
452 |
# Create holding table directly from select result.
|
|
4568.1.8
by Jeroen Vermeulen
Compile fixes. |
453 |
if inert_where is None: |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
454 |
# We'll be pouring all rows from this table. To avoid a costly
|
455 |
# second write pass (which would rewrite all records in the
|
|
456 |
# holding table), we assign new_ids right in the same query.
|
|
457 |
cur.execute(''' |
|
458 |
CREATE %(temp)s TABLE %(holding_table)s AS |
|
5396.2.2
by Jeroen Vermeulen
WHERE clause in extract() can now refer to source-table row as 'source.' |
459 |
SELECT DISTINCT ON (source.id)
|
4568.1.11
by Jeroen Vermeulen
Many more fixes. Now to get the tests to pass... |
460 |
%(columns)s, %(id_sequence)s AS new_id |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
461 |
FROM %(source_tables)s |
4918.4.11
by Jeroen Vermeulen
Removed more temp table usage. Also, commit regularly and analyze holding tables before pouring. |
462 |
%(where)s |
463 |
ORDER BY id''' % table_creation_parameters) |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
464 |
else: |
465 |
# Some of the rows may have to have null new_ids. To avoid
|
|
466 |
# wasting "address space" on the sequence, we populate the entire
|
|
467 |
# holding table with null new_ids, then fill in new_id only for
|
|
468 |
# rows that do not match the "inert_where" condition.
|
|
469 |
cur.execute(''' |
|
470 |
CREATE %(temp)s TABLE %(holding_table)s AS |
|
5396.2.2
by Jeroen Vermeulen
WHERE clause in extract() can now refer to source-table row as 'source.' |
471 |
SELECT DISTINCT ON (source.id)
|
4568.1.11
by Jeroen Vermeulen
Many more fixes. Now to get the tests to pass... |
472 |
%(columns)s, NULL::integer AS new_id |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
473 |
FROM %(source_tables)s |
4918.4.11
by Jeroen Vermeulen
Removed more temp table usage. Also, commit regularly and analyze holding tables before pouring. |
474 |
%(where)s |
475 |
ORDER BY id''' % table_creation_parameters) |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
476 |
cur.execute(''' |
477 |
UPDATE %(holding_table)s AS holding |
|
478 |
SET new_id = %(id_sequence)s |
|
479 |
WHERE NOT (%(inert_where)s) |
|
480 |
''' % table_creation_parameters) |
|
481 |
||
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
482 |
return cur |
483 |
||
484 |
def _indexIdColumn(self, holding_table, source_table, cur): |
|
485 |
"""Index id column on holding table.
|
|
486 |
||
487 |
Creates a unique index on "id" column in holding table. The index
|
|
488 |
gets the name of the holding table with "_id" appended.
|
|
489 |
"""
|
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
490 |
source_table = str(source_table) |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
491 |
self.logger.debug("Indexing %s" % holding_table) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
492 |
cur.execute(''' |
493 |
CREATE UNIQUE INDEX %s |
|
494 |
ON %s (id) |
|
495 |
''' % (self.getHoldingTableName(source_table, 'id'), holding_table)) |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
496 |
|
497 |
def _retargetForeignKeys(self, holding_table, joins, cur): |
|
498 |
"""Replace foreign keys in new holding table.
|
|
499 |
||
500 |
Set the values of the foreign keys that are to be retargeted to those
|
|
501 |
in their respective "new_" variants, then drop those "new_" columns we
|
|
502 |
added from the holding table.
|
|
503 |
"""
|
|
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
504 |
columns = [join.lower() for join in joins] |
4271.3.17
by Jeroen Vermeulen
Merge from rocketfuel, including Great Renaming of DistroRelease to DistroSeries |
505 |
fk_updates = ['%s = new_%s' % (column, column) for column in columns] |
4271.2.17
by Jeroen Vermeulen
Changes based on review by Tim Penhey |
506 |
updates = ', '.join(fk_updates) |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
507 |
self.logger.debug("Redirecting foreign keys: %s" % updates) |
4271.2.17
by Jeroen Vermeulen
Changes based on review by Tim Penhey |
508 |
cur.execute("UPDATE %s SET %s" % (holding_table, updates)) |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
509 |
for column in columns: |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
510 |
self.logger.debug("Dropping foreign-key column %s" % column) |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
511 |
cur.execute("ALTER TABLE %s DROP COLUMN new_%s" |
512 |
% (holding_table, column)) |
|
4271.2.4
by Jeroen Vermeulen
Updated documentation; removed last references to translation tables |
513 |
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
514 |
def needsRecovery(self): |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
515 |
"""Do we have holding tables with recoverable data from previous run?
|
4271.2.4
by Jeroen Vermeulen
Updated documentation; removed last references to translation tables |
516 |
|
517 |
Returns Boolean answer.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
518 |
"""
|
519 |
||
520 |
cur = cursor() |
|
521 |
||
522 |
# If there are any holding tables to be poured into their source
|
|
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
523 |
# tables, there must at least be one for the last table that pour()
|
524 |
# processes.
|
|
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
525 |
last_holding_table = self.getRawHoldingTableName(self.tables[-1]) |
526 |
if not postgresql.have_table(cur, last_holding_table): |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
527 |
return False |
528 |
||
529 |
# If the first table in our list also still exists, and it still has
|
|
530 |
# its new_id column, then the pouring process had not begun yet.
|
|
531 |
# Assume the data was not ready for pouring.
|
|
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
532 |
first_holding_table = self.getRawHoldingTableName(self.tables[0]) |
533 |
if postgresql.table_has_column(cur, first_holding_table, 'new_id'): |
|
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
534 |
self.logger.info( |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
535 |
"Previous run aborted too early for recovery; redo all") |
536 |
return False |
|
537 |
||
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
538 |
self.logger.info("Recoverable data found") |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
539 |
return True |
540 |
||
4271.3.19
by Jeroen Vermeulen
Gave MultiTableCopy methods more concise names, now that their class indicates context |
541 |
def pour(self, transaction_manager): |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
542 |
"""Pour data from holding tables back into source tables.
|
543 |
||
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
544 |
Rows in the holding table that have their new_id set to null are
|
545 |
skipped.
|
|
546 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
547 |
The transaction manager is committed and re-opened after every batch
|
548 |
run.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
549 |
|
550 |
Batch sizes are dynamically adjusted to meet the stated time goal.
|
|
551 |
"""
|
|
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
552 |
if self.last_extracted_table is None: |
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
553 |
if not self.needsRecovery(): |
4271.3.16
by Jeroen Vermeulen
More feedback from the review team; made TunableLoop an interface; replaced performChunk() with __call__() |
554 |
raise AssertionError("Can't pour: no tables extracted") |
555 |
elif self.last_extracted_table != len(self.tables) - 1: |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
556 |
raise AssertionError( |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
557 |
"Not safe to pour: last table '%s' was not extracted" |
558 |
% self.tables[-1]) |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
559 |
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
560 |
cur = self._commit(transaction_manager) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
561 |
|
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
562 |
# Don't let postgres revert to slow sequential scans while we pour.
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
563 |
# That might otherwise happen to the holding table as its vital "id"
|
564 |
# index degrades with the removal of rows.
|
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
565 |
postgresql.allow_sequential_scans(cur, False) |
566 |
||
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
567 |
# Main loop: for each of the source tables being copied, see if
|
568 |
# there's a matching holding table. If so, prepare it, pour it back
|
|
569 |
# into the source table, and drop.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
570 |
for table in self.tables: |
4271.2.10
by Jeroen Vermeulen
Made ztm a parameter to MultiTableCopy.pourHoldingTables; lots more testing |
571 |
holding_table_unquoted = self.getRawHoldingTableName(table) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
572 |
|
573 |
if not postgresql.have_table(cur, holding_table_unquoted): |
|
574 |
# We know we're in a suitable state for pouring. If this
|
|
575 |
# table does not exist, it must be because it's been poured
|
|
576 |
# out completely and dropped in an earlier instance of this
|
|
577 |
# loop, before the failure we're apparently recovering from.
|
|
578 |
continue
|
|
579 |
||
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
580 |
holding_table = self.getHoldingTableName(table) |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
581 |
self.logger.info("Pouring %s back into %s..." |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
582 |
% (holding_table, table)) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
583 |
|
584 |
tablestarttime = time.time() |
|
585 |
||
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
586 |
has_new_id = postgresql.table_has_column( |
587 |
cur, holding_table_unquoted, 'new_id') |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
588 |
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
589 |
self._pourTable( |
590 |
holding_table, table, has_new_id, transaction_manager) |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
591 |
|
592 |
# Drop holding table. It may still contain rows with id set to
|
|
593 |
# null. Those must not be poured.
|
|
4271.3.2
by Jeroen Vermeulen
Renamed TunedLoop to TunableLoop based on pre-impl chat with BjornT. |
594 |
postgresql.drop_tables(cursor(), holding_table) |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
595 |
|
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
596 |
self.logger.debug( |
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
597 |
"Pouring %s took %.3f seconds." |
13011.4.2
by Stuart Bishop
Delint |
598 |
% (holding_table, time.time() - tablestarttime)) |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
599 |
|
3691.8.105
by Carlos Perello Marin
Applied review comments for postmerge review of r4272 + bug fix for bug #122363 |
600 |
cur = self._commit(transaction_manager) |
4271.3.10
by Jeroen Vermeulen
Oops, another hurried typo |
601 |
|
4918.4.22
by Jeroen Vermeulen
Changes based on reviews by bac and jml. |
602 |
# In future, let the database perform sequential scans again if it
|
603 |
# decides that's best.
|
|
604 |
postgresql.allow_sequential_scans(cur, True) |
|
605 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
606 |
def _pourTable( |
607 |
self, holding_table, table, has_new_id, transaction_manager): |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
608 |
"""Pour contents of a holding table back into its source table.
|
609 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
610 |
This will commit transaction_manager, typically multiple times.
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
611 |
"""
|
612 |
if has_new_id: |
|
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
613 |
# Update ids in holding table from originals to copies. To
|
614 |
# broaden the caller's opportunity to manipulate rows in the
|
|
615 |
# holding tables, we skip rows that have new_id set to null.
|
|
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
616 |
cur = cursor() |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
617 |
cur.execute("UPDATE %s SET id=new_id" % holding_table) |
618 |
# Restore table to original schema
|
|
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
619 |
cur.execute("ALTER TABLE %s DROP COLUMN new_id" % holding_table) |
4568.1.1
by Jeroen Vermeulen
Several new features to support online distroseries translation updates. |
620 |
self._commit(transaction_manager) |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
621 |
self.logger.debug("...rearranged ids...") |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
622 |
|
4918.4.5
by Jeroen Vermeulen
New pre-pouring callback in MultiTableCopy; used to delete dangling POMsgSets and POSubmissions while copying distroseries translations. |
623 |
callback = self.pre_pouring_callbacks.get(table) |
624 |
if callback is not None: |
|
625 |
callback(holding_table, table) |
|
626 |
||
4271.3.1
by Jeroen Vermeulen
First stab at extracting goal-oriented loop-tuning code |
627 |
# Now pour holding table's data into its source table. This is where
|
628 |
# we start writing to tables that other clients will be reading, and
|
|
629 |
# our transaction will usually be serializable, so row locks are a
|
|
630 |
# concern. Break the writes up in batches of at least a thousand
|
|
631 |
# rows. The goal is to have these transactions running no longer than
|
|
632 |
# five seconds or so each; we aim for four just to be sure.
|
|
633 |
||
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
634 |
pourer = PouringLoop( |
4726.1.2
by Jeroen Vermeulen
Changes based on review. |
635 |
holding_table, table, transaction_manager, self.logger, |
636 |
self.batch_pouring_callbacks.get(table)) |
|
7675.193.2
by Henning Eggers
Use DBLoopTuner for pouring in MultiTableCopy. |
637 |
DBLoopTuner( |
4271.3.15
by Jeroen Vermeulen
Changes based on review by bac. |
638 |
pourer, self.seconds_per_batch, self.minimum_batch_size).run() |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
639 |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
640 |
def _checkExtractionOrder(self, source_table): |
641 |
"""Verify order in which tables are extracted against tables list.
|
|
642 |
||
4271.2.9
by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs |
643 |
Check that the caller follows the stated plan, extracting from tables
|
644 |
in the same order as in self.tables.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
645 |
"""
|
646 |
try: |
|
647 |
table_number = self.tables.index(source_table) |
|
648 |
except ValueError: |
|
649 |
raise AssertionError( |
|
650 |
"Can't extract '%s': not in list of tables" % source_table) |
|
651 |
||
652 |
if self.last_extracted_table is None: |
|
653 |
# Can't skip the first table!
|
|
654 |
if table_number > 0: |
|
655 |
raise AssertionError( |
|
4271.3.14
by Jeroen Vermeulen
Changes based on review of bug-116196 by Tim Penhey. |
656 |
"Can't extract: skipped first table '%s'" |
657 |
% self.tables[0]) |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
658 |
else: |
659 |
if table_number < self.last_extracted_table: |
|
660 |
raise AssertionError( |
|
661 |
"Table '%s' extracted after its turn" % source_table) |
|
13011.4.2
by Stuart Bishop
Delint |
662 |
if table_number > self.last_extracted_table + 1: |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
663 |
raise AssertionError( |
664 |
"Table '%s' extracted before its turn" % source_table) |
|
4271.2.9
by Jeroen Vermeulen
First unit tests for MultiTableCopy; disallow repeated extractions; fix some bugs |
665 |
if table_number == self.last_extracted_table: |
666 |
raise AssertionError( |
|
667 |
"Table '%s' extracted again" % source_table) |
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
668 |
|
669 |
self.last_extracted_table = table_number |
|
670 |
||
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
671 |
def _checkForeignKeyOrder(self, fk, referenced_table): |
672 |
"""Verify that we're in a position to "retarget" a foreign key.
|
|
673 |
||
674 |
We've been asked to retarget a foreign key while copying. Check that
|
|
675 |
the table it refers to has already been copied.
|
|
676 |
"""
|
|
4568.1.9
by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table. |
677 |
lower_name = referenced_table.lower() |
678 |
lower_tables = self.lower_tables |
|
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
679 |
try: |
4568.1.9
by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table. |
680 |
target_number = lower_tables.index(lower_name) |
4271.2.13
by Jeroen Vermeulen
Made pouring loop go forwards again. Factored out some methods after pre-impl call with flacoste. |
681 |
except ValueError: |
682 |
raise AssertionError( |
|
683 |
"Foreign key '%s' refers to table '%s' " |
|
684 |
"which is not being copied" % (fk, referenced_table)) |
|
685 |
||
4568.1.9
by Jeroen Vermeulen
Fixed up "unknown" new_id column type in holding tables when using temp table. |
686 |
if target_number > self.last_extracted_table: |
687 |
raise AssertionError( |
|
688 |
"Foreign key '%s' refers to table '%s' " |
|
689 |
"which is to be copied later" % (fk, referenced_table)) |
|
690 |
if target_number == self.last_extracted_table: |
|
691 |
raise AssertionError( |
|
692 |
"Foreign key '%s' in table '%s' " |
|
693 |
"is a self-reference" % (fk, referenced_table)) |
|
694 |
||
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
695 |
def _commit(self, transaction_manager): |
696 |
"""Commit our transaction and create replacement cursor.
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
697 |
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
698 |
Use this as "cur = self._commit(transaction_manager)" to commit a
|
699 |
transaction, restart it, and create a cursor that lives within the new
|
|
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
700 |
transaction.
|
701 |
"""
|
|
702 |
start = time.time() |
|
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
703 |
transaction_manager.commit() |
4692.1.1
by Jeroen Vermeulen
Pass on logger from script, with the settings it takes from the command line. |
704 |
self.logger.debug("Committed in %.3f seconds" % (time.time() - start)) |
4271.3.18
by Jeroen Vermeulen
Changes based on review for parent branch, bug-116196. |
705 |
transaction_manager.begin() |
4271.2.1
by Jeroen Vermeulen
Extracting table-pouring code out of distrorelease, as per bug 116196: https://bugs.launchpad.net/rosetta/+bug/116196 |
706 |
return cursor() |