~launchpad-pqm/launchpad/devel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
#!/usr/bin/python -S
#
# Copyright 2009 Canonical Ltd.  This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).

"""
Apply all outstanding schema patches to an existing launchpad database
"""

__metaclass__ = type

# pylint: disable-msg=W0403
import _pythonpath # Sort PYTHONPATH

from cStringIO import StringIO
import glob
import os.path
from optparse import OptionParser
import re
import sys
from tempfile import NamedTemporaryFile
from textwrap import dedent

from canonical.launchpad.scripts import db_options, logger_options, logger
from canonical.database.sqlbase import connect, ISOLATION_LEVEL_AUTOCOMMIT
from canonical.database.postgresql import fqn
import replication.helpers


SCHEMA_DIR = os.path.dirname(__file__)


def main():
    con = connect(options.dbuser)
    if replication.helpers.slony_installed(con):
        con.close()
        if options.commit is False:
            parser.error("--dry-run does not make sense with replicated db")
        log.info("Applying patches to Slony-I environment.")
        apply_patches_replicated()
    else:
        log.info("Applying patches to unreplicated environment.")
        apply_patches_normal(con)

    return 0


def apply_patches_normal(con):
    """Update a non replicated database."""
    # trusted.sql contains all our stored procedures, which may
    # be required for patches to apply correctly so must be run first.
    apply_other(con, 'trusted.sql')

    # Apply the patches
    patches = get_patchlist(con)
    for (major, minor, patch), patch_file in patches:
        apply_patch(con, major, minor, patch, patch_file)

    # Update comments.
    apply_comments(con)

    # Commit changes
    if options.commit:
        log.debug("Committing changes")
        con.commit()


def apply_patches_replicated():
    """Update a Slony-I cluster."""

    # Get an autocommit connection. We use autocommit so we don't have to
    # worry about blocking locks needed by Slony-I.
    con = connect(options.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)

    # We use three slonik scripts to apply our DB patches.
    # The first script applies the DB patches to all nodes.

    # First make sure the cluster is synced.
    log.info("Waiting for cluster to sync.")
    replication.helpers.sync(timeout=600)

    outf = StringIO()

    # Start a transaction block.
    print >> outf, "try {"

    def run_sql(script):
        if os.path.isabs(script):
            full_path = script
        else:
            full_path = os.path.abspath(os.path.join(SCHEMA_DIR, script))
        assert os.path.exists(full_path), "%s doesn't exist." % full_path
        print >> outf, dedent("""\
            execute script (
                set id = @lpmain_set, event node = @master_node,
                filename='%s'
                );
            """ % full_path)

    # Apply trusted.sql
    run_sql('trusted.sql')

    # We are going to generate some temporary files using
    # NamedTempoararyFile. Store them here so we can control when
    # they get closed and cleaned up.
    temporary_files = []

    # Apply DB patches.
    patches = get_patchlist(con)
    for (major, minor, patch), patch_file in patches:
        run_sql(patch_file)
        # Cause a failure if the patch neglected to update
        # LaunchpadDatabaseRevision.
        assert_script = NamedTemporaryFile(prefix='assert', suffix='.sql')
        print >> assert_script, dedent("""
            SELECT assert_patch_applied(%d, %d, %d);
            """ % (major, minor, patch))
        assert_script.flush()
        run_sql(assert_script.name)
        temporary_files.append(assert_script)

    # Apply comments.sql. Default slonik refuses to run it as one
    # 'execute script' because it contains too many statements, so chunk
    # it (we don't want to rebuild slony with a higher limit).
    comments_path = os.path.join(os.path.dirname(__file__), 'comments.sql')
    comments = re.findall(
            "(?ms).*?'\s*;\s*$", open(comments_path, 'r').read())
    while comments:
        comment_file = NamedTemporaryFile(prefix="comments", suffix=".sql")
        print >> comment_file, '\n'.join(comments[:1000])
        del comments[:1000]
        comment_file.flush()
        run_sql(comment_file.name)
        # Store a reference so it doesn't get garbage collected before our
        # slonik script is run.
        temporary_files.append(comment_file)

    # Close transaction block and abort on error.
    print >> outf, dedent("""\
        }
        on error {
            echo 'Failed! Slonik script aborting. Patches rolled back.';
            exit 1;
            }
        """)

    # Execute the script with slonik.
    if not replication.helpers.execute_slonik(outf.getvalue()):
        log.fatal("Aborting.")

    # Cleanup our temporary files - they applied successfully.
    for temporary_file in temporary_files:
        temporary_file.close()
    del temporary_files

    # Wait for replication to sync.
    replication.helpers.sync(timeout=0)

    # The db patches have now been applied to all nodes, and we are now
    # committed to completing the upgrade (!). If any of the later stages
    # fail, it will likely involve manual cleanup.

    # We now scan for new tables and add them to the lpmain
    # replication set using a second script. Note that upgrade.py only
    # deals with the lpmain replication set.

    # Detect new tables and sequences.
    # Everything else that isn't replicated should go in the lpmain
    # replication set.
    cur = con.cursor()
    unrepl_tabs, unrepl_seqs = replication.helpers.discover_unreplicated(cur)

    # But warn if we are going to replicate something not in the calculated
    # set, as *_SEED in replication.helpers needs to be updated. We don't want
    # abort unless absolutely necessary to avoid manual cleanup.
    lpmain_tabs, lpmain_seqs = replication.helpers.calculate_replication_set(
            cur, replication.helpers.LPMAIN_SEED)

    assumed_tabs = unrepl_tabs.difference(lpmain_tabs)
    assumed_seqs = unrepl_seqs.difference(lpmain_seqs)
    for obj in (assumed_tabs.union(assumed_seqs)):
        log.warn(
            "%s not in calculated lpmain replication set. "
            "Update *_SEED in replication/helpers.py" % obj)

    if unrepl_tabs or unrepl_seqs:
        # TODO: Or if the holding set already exists - catch an aborted run.
        log.info(
            "New stuff needs replicating: %s"
            % ', '.join(sorted(unrepl_tabs.union(unrepl_seqs))))
        # Create a new replication set to hold new tables and sequences
        # TODO: Only create set if it doesn't already exist.
        outf = StringIO()
        print >> outf, dedent("""\
            try {
                create set (
                    id = @holding_set, origin = @master_node,
                    comment = 'Temporary set to merge'
                    );
            """)

        # Add the new tables and sequences to the holding set.
        cur.execute("""
            SELECT max(tab_id) FROM %s
            """ % fqn(replication.helpers.CLUSTER_NAMESPACE, 'sl_table'))
        next_id = cur.fetchone()[0] + 1
        for tab in unrepl_tabs:
            print >> outf, dedent("""\
                echo 'Adding %s to holding set for lpmain merge.';
                set add table (
                    set id = @holding_set, origin = @master_node, id=%d,
                    fully qualified name = '%s',
                    comment = '%s'
                    );
                """ % (tab, next_id, tab, tab))
            next_id += 1
        cur.execute("""
            SELECT max(seq_id) FROM %s
            """ % fqn(replication.helpers.CLUSTER_NAMESPACE, 'sl_sequence'))
        next_id = cur.fetchone()[0] + 1
        for seq in  unrepl_seqs:
            print >> outf, dedent("""\
                echo 'Adding %s to holding set for lpmain merge.';
                set add sequence (
                    set id = @holding_set, origin = @master_node, id=%d,
                    fully qualified name = '%s',
                    comment = '%s'
                    );
                """ % (seq, next_id, seq, seq))
            next_id += 1

        print >> outf, dedent("""\
            } on error {
                echo 'Failed to create holding set! Aborting.';
                exit 1;
                }
            """)

        # Subscribe the holding set to all replicas.
        # TODO: Only subscribe the set if not already subscribed.
        # Close the transaction and sync. Important, or MERGE SET will fail!
        # Merge the sets.
        # Sync.
        # Drop the holding set.
        for slave_node in replication.helpers.get_slave_nodes(con):
            print >> outf, dedent("""\
                echo 'Subscribing holding set to @node%d_node.';
                subscribe set (
                    id=@holding_set,
                    provider=@master_node, receiver=@node%d_node, forward=yes);
                echo 'Waiting for sync';
                sync (id=@master_node);
                wait for event (
                    origin=@master_node, confirmed=ALL,
                    wait on=@master_node, timeout=0
                    );
                """ % (slave_node.node_id, slave_node.node_id))

        print >> outf, dedent("""\
            echo 'Merging holding set to lpmain';
            merge set (
                id=@lpmain_set, add id=@holding_set, origin=@master_node
                );
            """)

        # Execute the script and sync.
        if not replication.helpers.execute_slonik(outf.getvalue()):
            log.fatal("Aborting.")
        replication.helpers.sync(timeout=0)

    # We also scan for tables and sequences we want to drop and do so using
    # a final slonik script. Instead of dropping tables in the DB patch,
    # we rename them into the ToDrop namespace.
    cur.execute("""
        SELECT nspname, relname, tab_id
        FROM pg_class
        JOIN pg_namespace ON relnamespace = pg_namespace.oid
        LEFT OUTER JOIN %s ON pg_class.oid = tab_reloid
        WHERE nspname='todrop' AND relkind='r'
        """ % fqn(replication.helpers.CLUSTER_NAMESPACE, 'sl_table'))
    tabs_to_drop = set(
        (fqn(nspname, relname), tab_id)
        for nspname, relname, tab_id in cur.fetchall())

    # Generate a slonik script to remove tables from the replication set,
    # and a DROP TABLE/DROP SEQUENCE sql script to run after.
    if tabs_to_drop:
        log.info("Dropping tables: %s" % ', '.join(
            name for name, id in tabs_to_drop))
        sk = StringIO()
        sql = NamedTemporaryFile(prefix="drop", suffix=".sql")
        print >> sk, "try {"
        for tab_name, tab_id in tabs_to_drop:
            if tab_id is not None:
                print >> sk, dedent("""\
                    echo 'Removing %s from replication';
                    set drop table (origin=@master_node, id=%d);
                    """ % (tab_name, tab_id))
            print >> sql, "DROP TABLE %s;" % tab_name
        sql.flush()
        print >> sk, dedent("""\
            execute script (
                set id=@lpmain_set, event node=@master_node,
                filename='%s'
                );
            }
            on error {
                echo 'Failed to drop tables. Aborting.';
                exit 1;
                }
            """ % sql.name)
        if not replication.helpers.execute_slonik(sk.getvalue()):
            log.fatal("Aborting.")
        sql.close()

    # Now drop sequences. We don't do this at the same time as the tables,
    # as most sequences will be dropped implicitly with the table drop.
    cur.execute("""
        SELECT nspname, relname, seq_id
        FROM pg_class
        JOIN pg_namespace ON relnamespace = pg_namespace.oid
        LEFT OUTER JOIN %s ON pg_class.oid = seq_reloid
        WHERE nspname='todrop' AND relkind='S'
        """ % fqn(replication.helpers.CLUSTER_NAMESPACE, 'sl_sequence'))
    seqs_to_drop = set(
        (fqn(nspname, relname), tab_id)
        for nspname, relname, tab_id in cur.fetchall())

    if seqs_to_drop:
        log.info("Dropping sequences: %s" % ', '.join(
            name for name, id in seqs_to_drop))
        # Generate a slonik script to remove sequences from the
        # replication set, DROP SEQUENCE sql script to run after.
        sk = StringIO()
        sql = NamedTemporaryFile(prefix="drop", suffix=".sql")
        print >> sk, "try {"
        for seq_name, seq_id in seqs_to_drop:
            if seq_id is not None:
                print >> sk, dedent("""\
                    echo 'Removing %s from replication';
                    set drop sequence (origin=@master_node, id=%d);
                    """ % (seq_name, seq_id))
            print >> sql, "DROP SEQUENCE %s;" % seq_name
        sql.flush()
        print >> sk, dedent("""\
            execute script (
                set id=@lpmain_set, event node=@master_node,
                filename='%s'
                );
            }
            on error {
                echo 'Failed to drop sequences. Aborting.';
                exit 1;
                }
            """ % sql.name)
        if not replication.helpers.execute_slonik(sk.getvalue()):
            log.fatal("Aborting.")
    replication.helpers.sync(timeout=0)


def get_patchlist(con):
    """Return a patches that need to be applied to the connected database
    in [((major, minor, patch), patch_file)] format.
    """
    dbpatches = applied_patches(con)

    # Generate a list of all patches we might want to apply
    patches = []
    all_patch_files = glob.glob(
        os.path.join(SCHEMA_DIR, 'patch-????-??-?.sql'))
    all_patch_files.sort()
    for patch_file in all_patch_files:
        m = re.search('patch-(\d+)-(\d+)-(\d).sql$', patch_file)
        if m is None:
            log.fatal('Invalid patch filename %s' % repr(patch_file))
            sys.exit(1)

        major, minor, patch = [int(i) for i in m.groups()]
        if (major, minor, patch) in dbpatches:
            continue # This patch has already been applied
        log.debug("Found patch %d.%d.%d -- %s" % (
            major, minor, patch, patch_file
            ))
        patches.append(((major, minor, patch), patch_file))
    return patches


def applied_patches(con):
    """Return a list of all patches that have been applied to the database.
    """
    cur = con.cursor()
    cur.execute("SELECT major, minor, patch FROM LaunchpadDatabaseRevision")
    return [tuple(row) for row in cur.fetchall()]


def apply_patch(con, major, minor, patch, patch_file):
    log.info("Applying %s" % patch_file)
    cur = con.cursor()
    full_sql = open(patch_file).read()

    # Strip comments
    full_sql = re.sub('(?xms) \/\* .*? \*\/', '', full_sql)
    full_sql = re.sub('(?xm) ^\s*-- .*? $', '', full_sql)

    # Regular expression to extract a single statement.
    # A statement may contain semicolons if it is a stored procedure
    # definition, which requires a disgusting regexp or a parser for
    # PostgreSQL specific SQL.
    statement_re = re.compile(
            r"( (?: [^;$] | \$ (?! \$) | \$\$.*? \$\$)+ )",
            re.DOTALL | re.MULTILINE | re.VERBOSE
            )
    for sql in statement_re.split(full_sql):
        sql = sql.strip()
        if sql and sql != ';':
            cur.execute(sql) # Will die on a bad patch.

    # Ensure the patch updated LaunchpadDatabaseRevision. We could do this
    # automatically and avoid the boilerplate, but then we would lose the
    # ability to easily apply the patches manually.
    if (major, minor, patch) not in applied_patches(con):
        log.fatal("%s failed to update LaunchpadDatabaseRevision correctly"
                % patch_file)
        sys.exit(2)

    # Commit changes if we allow partial updates.
    if options.commit and options.partial:
        log.debug("Committing changes")
        con.commit()


def apply_other(con, script):
    log.info("Applying %s" % script)
    cur = con.cursor()
    path = os.path.join(os.path.dirname(__file__), script)
    sql = open(path).read()
    cur.execute(sql)

    if options.commit and options.partial:
        log.debug("Committing changes")
        con.commit()


def apply_comments(con):
    apply_other(con, 'comments.sql')


if __name__ == '__main__':
    parser = OptionParser()
    db_options(parser)
    logger_options(parser)
    parser.add_option(
            "-n", "--dry-run", dest="commit", default=True,
            action="store_false", help="Don't actually commit changes"
            )
    parser.add_option(
            "-p", "--partial", dest="partial", default=False,
            action="store_true",
            help="Commit after applying each patch",
            )
    (options, args) = parser.parse_args()

    if args:
        parser.error("Too many arguments")

    log = logger(options)
    main()