1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
|
#!/usr/bin/python2.6 -S
# Copyright 2011 Canonical Ltd. This software is licensed under the
# GNU Affero General Public License version 3 (see the file LICENSE).
"""Full update process."""
import _pythonpath
from datetime import datetime
from optparse import OptionParser
import subprocess
import sys
from lp.services.scripts import (
db_options,
logger,
logger_options,
)
from preflight import (
KillConnectionsPreflight,
NoConnectionCheckPreflight,
)
import security # security.py script
import upgrade # upgrade.py script
PGBOUNCER_INITD = ['sudo', '/etc/init.d/pgbouncer']
def run_pgbouncer(log, cmd):
"""Invoke the pgbouncer initscript.
:param cmd: One of 'start', 'stop' or 'status'.
"""
assert cmd in ('start', 'stop', 'status'), '''
Unrecognized command; remember any new commands need to be
granted sudo on staging and prod.
'''
pgbouncer_rc = subprocess.call(PGBOUNCER_INITD + [cmd])
sys.stdout.flush()
if pgbouncer_rc != 0:
log.error("pgbouncer '%s' failed [%s]", cmd, pgbouncer_rc)
return pgbouncer_rc
def run_upgrade(options, log):
"""Invoke upgrade.py in-process.
It would be easier to just invoke the script, but this way we save
several seconds of overhead as the component architecture loads up.
"""
# Fake expected command line arguments and global log
options.commit = True
options.partial = False
upgrade.options = options
upgrade.log = log
# Invoke the database schema upgrade process.
try:
return upgrade.main()
except Exception:
log.exception('Unhandled exception')
return 1
except SystemExit, x:
log.fatal("upgrade.py failed [%s]", x)
def run_security(options, log):
"""Invoke security.py in-process.
It would be easier to just invoke the script, but this way we save
several seconds of overhead as the component architecture loads up.
"""
# Fake expected command line arguments and global log
options.dryrun = False
options.revoke = True
options.owner = 'postgres'
options.cluster = True
security.options = options
security.log = log
# Invoke the database security reset process.
try:
return security.main(options)
except Exception:
log.exception('Unhandled exception')
return 1
except SystemExit, x:
log.fatal("security.py failed [%s]", x)
def main():
parser = OptionParser()
# Add all the command command line arguments.
db_options(parser)
logger_options(parser)
(options, args) = parser.parse_args()
if args:
parser.error("Too many arguments")
log = logger(options)
#
# Preflight checks. Confirm as best we can that the upgrade will
# work unattended.
#
# Confirm we can invoke PGBOUNCER_INITD
log.debug("Confirming sudo access to pgbouncer startup script")
pgbouncer_rc = run_pgbouncer(log, 'status')
if pgbouncer_rc != 0:
return pgbouncer_rc
# We initially ignore open connections, as they will shortly be
# killed.
if not NoConnectionCheckPreflight(log).check_all():
return 99
#
# Start the actual upgrade. Failures beyond this point need to
# generate informative messages to help with recovery.
#
# status flags
pgbouncer_down = False
upgrade_run = False
security_run = False
outage_start = datetime.now()
try:
# Shutdown pgbouncer
log.info("Outage starts. Shutting down pgbouncer.")
pgbouncer_rc = run_pgbouncer(log, 'stop')
if pgbouncer_rc != 0:
log.fatal("pgbouncer not shut down [%s]", pgbouncer_rc)
return pgbouncer_rc
pgbouncer_down = True
if not KillConnectionsPreflight(log).check_all():
return 100
log.info("Preflight check succeeded. Starting upgrade.")
upgrade_rc = run_upgrade(options, log)
if upgrade_rc != 0:
return upgrade_rc
upgrade_run = True
log.info("Database patches applied. Stored procedures updated.")
security_rc = run_security(options, log)
if security_rc != 0:
return security_rc
security_run = True
log.info("All database upgrade steps completed")
log.info("Restarting pgbouncer")
pgbouncer_rc = run_pgbouncer(log, 'start')
if pgbouncer_rc != 0:
log.fatal("pgbouncer not restarted [%s]", pgbouncer_rc)
return pgbouncer_rc
pgbouncer_down = False
log.info("Outage complete. %s", datetime.now() - outage_start)
# We will start seeing connections as soon as pgbouncer is
# reenabled, so ignore them here.
if not NoConnectionCheckPreflight(log).check_all():
return 101
log.info("All good. All done.")
return 0
finally:
if pgbouncer_down:
# Even if upgrade.py or security.py failed, we should be in
# a good enough state to continue operation so restart
# pgbouncer and allow connections.
# - upgrade.py may have failed to update the master, and
# changes should have rolled back.
# - upgrade.py may have failed to update a slave, breaking
# replication. The master is still operational, but
# slaves may be lagging and have the old schema.
# - security.py may have died, rolling back its changes on
# one or more nodes.
# In all cases except the first, we have recovery to do but
# systems are probably ok, or at least providing some
# services.
pgbouncer_rc = run_pgbouncer(log, 'start')
if pgbouncer_rc == 0:
log.info("Despite failures, pgbouncer restarted.")
log.info("Outage complete. %s", datetime.now() - outage_start)
else:
log.fatal("pgbouncer is down and refuses to restart")
if not upgrade_run:
log.warning("upgrade.py still needs to be run")
if not security_run:
log.warning("security.py still needs to be run")
if __name__ == '__main__':
sys.exit(main())
|