~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.10 by Karl Fogel
Add the copyright header block to top-level files.
2
#
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
3
# Copyright 2009, 2010 Canonical Ltd.  This software is licensed under the
8687.15.10 by Karl Fogel
Add the copyright header block to top-level files.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
5
6
"""Tests that get run automatically on a merge."""
9722.1.1 by Gary Poster
[testfix][r=salgado][ui=none] Try to fix the buildbot problem: buildbot is starting test_on_merge in a way that no-one else is.
7
import _pythonpath
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
8
10303.1.16 by Gary Poster
remove disgusting bit: we will change buildbot
9
import sys, time
10
import os, errno
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
11
import tabnanny
12
from StringIO import StringIO
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
13
import psycopg2
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
14
from subprocess import Popen, PIPE, STDOUT
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
15
from signal import SIGKILL, SIGTERM, SIGINT, SIGHUP
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
16
import select
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
17
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
18
6393.2.1 by Joey Stanford
change the rather long timeout setting to something more reasonable and cleanup the explaination
19
# The TIMEOUT setting (expressed in seconds) affects how long a test will run
20
# before it is deemed to be hung, and then appropriately terminated.
21
# It's principal use is preventing a PQM job from hanging indefinitely and
22
# backing up the queue.
10866.2.2 by Maris Fogels
Lowered the test timeout a bit.
23
# e.g. Usage: TIMEOUT = 60 * 10
24
# This will set the timeout to 10 minutes.
25
TIMEOUT = 60 * 10
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
26
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
27
HERE = os.path.dirname(os.path.realpath(__file__))
28
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
29
30
def main():
8234.1.4 by Gary Poster
test works, nominally; and bin/py is a bit more functional. problems with import warnings are more serious because they cause tests to fail.
31
    """Call bin/test with whatever arguments this script was run with.
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
32
10866.2.14 by Maris Fogels
Remove the tabnanny code as pocketlint and text editors already catch this stuff.
33
    Prior to running the tests this script sets up the test database.
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
34
35
    Returns 1 on error, otherwise it returns the testrunner's exit code.
36
    """
37
    if setup_test_database() != 0:
38
        return 1
39
40
    return run_test_process()
41
42
43
def setup_test_database():
44
    """Set up a test instance of our postgresql database.
45
46
    Returns 0 for success, 1 for errors.
47
    """
1064.1.3 by James Henstridge
merge from marius
48
    # Sanity check PostgreSQL version. No point in trying to create a test
49
    # database when PostgreSQL is too old.
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
50
    con = psycopg2.connect('dbname=template1')
1064.1.3 by James Henstridge
merge from marius
51
    cur = con.cursor()
52
    cur.execute('show server_version')
53
    server_version = cur.fetchone()[0]
54
    try:
55
        numeric_server_version = tuple(map(int, server_version.split('.')))
56
    except ValueError:
57
        # Skip this check if the version number is more complicated than
58
        # we expected.
59
        pass
60
    else:
3257.1.1 by Stuart Bishop
PostgreSQL 8.0+ required
61
        if numeric_server_version < (8, 0):
62
            print 'Your PostgreSQL version is too old.  You need 8.x.x'
1064.1.3 by James Henstridge
merge from marius
63
            print 'You have %s' % server_version
64
            return 1
65
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
66
    # Drop the template database if it exists - the Makefile does this
67
    # too, but we can explicity check for errors here
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
68
    con = psycopg2.connect('dbname=template1')
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
69
    con.set_isolation_level(0)
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
70
    cur = con.cursor()
1064.1.3 by James Henstridge
merge from marius
71
    try:
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
72
        cur.execute('drop database launchpad_ftest_template')
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
73
    except psycopg2.ProgrammingError, x:
1520 by Canonical.com Patch Queue Manager
Review and fix database security update code
74
        if 'does not exist' not in str(x):
75
            raise
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
76
77
    # If there are existing database connections, terminate. We have
78
    # rogue processes still connected to the database.
79
    for loop in range(2):
80
        cur.execute("""
81
            SELECT usename, current_query
82
            FROM pg_stat_activity
83
            WHERE datname IN (
84
                'launchpad_dev', 'launchpad_ftest_template', 'launchpad_ftest')
85
            """)
86
        results = list(cur.fetchall())
87
        if not results:
88
            break
89
        # Rogue processes. Report, sleep for a bit, and try again.
90
        for usename, current_query in results:
91
            print '!! Open connection %s - %s' % (usename, current_query)
92
        print 'Sleeping'
93
        time.sleep(20)
94
    else:
95
        print 'Cannot rebuild database. There are open connections.'
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
96
        return 1
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
97
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
98
    cur.close()
99
    con.close()
100
101
    # Build the template database. Tests duplicate this.
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
102
    schema_dir = os.path.join(HERE, 'database', 'schema')
10303.1.15 by Gary Poster
make test_on_merge.py work, so buildbot can pass
103
    if os.system('cd %s; make test > /dev/null' % (schema_dir)) != 0:
1764 by Canonical.com Patch Queue Manager
Now make check fails if anything goes wrong when loading the sampledata. r=stub
104
        print 'Failed to create database or load sampledata.'
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
105
        return 1
106
107
    # Sanity check the database. No point running tests if the
108
    # bedrock is crumbling.
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
109
    con = psycopg2.connect('dbname=launchpad_ftest_template')
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
110
    cur = con.cursor()
111
    cur.execute('show search_path')
112
    search_path = cur.fetchone()[0]
113
    if search_path != '$user,public,ts2':
114
        print 'Search path incorrect.'
115
        print 'Add the following line to /etc/postgresql/postgresql.conf:'
116
        print "    search_path = '$user,public,ts2'"
1064.1.3 by James Henstridge
merge from marius
117
        print "and tell postgresql to reload its configuration file."
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
118
        return 1
119
    cur.execute("""
120
        select pg_encoding_to_char(encoding) as encoding from pg_database
121
        where datname='launchpad_ftest_template'
122
        """)
123
    enc = cur.fetchone()[0]
3242.1.5 by Stuart Bishop
Make test_on_merge.py work with PostgreSQL 8.1
124
    if enc not in ('UNICODE', 'UTF8'):
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
125
        print 'Database encoding incorrectly set'
126
        return 1
1257 by Canonical.com Patch Queue Manager
Improve database locale checks, add locale sanity check to test_on_merge.py and improve test_on_merge output
127
    cur.execute(r"""
128
        SELECT setting FROM pg_settings
129
        WHERE context='internal' AND name='lc_ctype'
130
        """)
131
    loc = cur.fetchone()[0]
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
132
    #if not (loc.startswith('en_') or loc in ('C', 'en')):
133
    if loc != 'C':
1257 by Canonical.com Patch Queue Manager
Improve database locale checks, add locale sanity check to test_on_merge.py and improve test_on_merge output
134
        print 'Database locale incorrectly set. Need to rerun initdb.'
135
        return 1
136
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
137
    # Explicity close our connections - things will fail if we leave open
138
    # connections.
139
    cur.close()
140
    del cur
141
    con.close()
142
    del con
143
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
144
    return 0
145
146
147
def run_test_process():
148
    """Start the testrunner process and return its exit code."""
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
149
    print 'Running tests.'
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
150
    os.chdir(HERE)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
151
152
    # We run the test suite under a virtual frame buffer server so that the
153
    # JavaScript integration test suite can run.
10096.2.2 by Bjorn Tillenius
Run xvfb-run inside test_on_merge.py.
154
    cmd = [
10866.2.15 by Maris Fogels
Re-added the shell option to our subprocess call. The script fails with strange errors if we do not include this option.
155
        '/usr/bin/xvfb-run',
11243.1.1 by Maris Fogels
Make Xvfb log its startup errors, and clarify the startup arguments a bit.
156
        "--error-file=/var/tmp/xvfb-errors.log",
157
        "--server-args='-screen 0 1024x768x24'",
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
158
        os.path.join(HERE, 'bin', 'test')] + sys.argv[1:]
10096.2.3 by Bjorn Tillenius
Use xvfb-run in test_on_merge.py
159
    command_line = ' '.join(cmd)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
160
    print "Running command:", command_line
4107.1.9 by Stuart Bishop
Increase test_on_merge timeout to one hour
161
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
162
    # Run the test suite.  Make the suite the leader of a new process group
163
    # so that we can signal the group without signaling ourselves.
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
164
    xvfb_proc = Popen(
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
165
        command_line,
166
        stdout=PIPE,
167
        stderr=STDOUT,
10866.2.15 by Maris Fogels
Re-added the shell option to our subprocess call. The script fails with strange errors if we do not include this option.
168
        preexec_fn=os.setpgrp,
169
        shell=True)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
170
10866.2.4 by Maris Fogels
Tidied the code for review.
171
    # This code is very similar to what takes place in Popen._communicate(),
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
172
    # but this code times out if there is no activity on STDOUT for too long.
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
173
    # This keeps us from blocking when reading from a hung testrunner, allows
174
    # us to time out if the child process hangs, and avoids issues when using
175
    # Popen.communicate() with large data sets.
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
176
    open_readers = set([xvfb_proc.stdout])
3308.1.4 by Stuart Bishop
Fix test_on_merge.py incremental output
177
    while open_readers:
11316.12.4 by Martin Pool
Cleaner comment; complete sentence
178
        # select() blocks for a long time and can easily fail with EINTR
179
        # <https://bugs.launchpad.net/launchpad/+bug/615740>.  Really we
180
        # should have EINTR protection across the whole script (other syscalls
181
        # might be interrupted) but this is the longest and most likely to
182
        # hit, and doing it perfectly in python has proved to be quite hard in
183
        # bzr. -- mbp 20100924
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
184
        while True:
185
            try:
186
                rlist, wlist, xlist = select.select(open_readers, [], [], TIMEOUT)
187
                break
188
            except select.error, e:
11316.12.2 by Martin Pool
select.error doesn't have an errno attribute, only an args tuple
189
                # nb: select.error doesn't expose a named 'errno' attribute,
190
                # at least in python 2.6.5; see
191
                # <http://mail.python.org/pipermail/python-dev/2000-October/009671.html>
192
                if e[0] == errno.EINTR:
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
193
                    continue
194
                else:
195
                    raise
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
196
4092.2.18 by Barry Warsaw
Respond to spiv's review:
197
        if len(rlist) == 0:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
198
            # The select() statement timed out!
199
200
            if xvfb_proc.poll() is not None:
201
                # The process we were watching died.
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
202
                break
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
203
10866.2.5 by Maris Fogels
Nicer function name
204
            cleanup_hung_testrunner(xvfb_proc)
4092.2.12 by Barry Warsaw
Thanks to jamesh, do a better job of draining the subprocess's stdout and
205
            break
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
206
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
207
        if xvfb_proc.stdout in rlist:
208
            # Read a chunk of output from STDOUT.
209
            chunk = os.read(xvfb_proc.stdout.fileno(), 1024)
3367.1.2 by Stuart Bishop
Don't insert random spaces in test_on_merge.py output
210
            sys.stdout.write(chunk)
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
211
            if chunk == "":
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
212
                # Gracefully exit the loop if STDOUT is empty.
213
                open_readers.remove(xvfb_proc.stdout)
214
10866.2.4 by Maris Fogels
Tidied the code for review.
215
    rv = xvfb_proc.wait()
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
216
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
217
    if rv == 0:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
218
        print
219
        print 'Successfully ran all tests.'
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
220
    else:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
221
        print
222
        print 'Tests failed (exit code %d)' % rv
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
223
224
    return rv
225
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
226
10866.2.5 by Maris Fogels
Nicer function name
227
def cleanup_hung_testrunner(process):
10866.2.4 by Maris Fogels
Tidied the code for review.
228
    """Kill and clean up the testrunner process and its children."""
229
    print
230
    print
231
    print ("WARNING: A test appears to be hung. There has been no "
232
        "output for %d seconds." % TIMEOUT)
233
    print "Forcibly shutting down the test suite"
234
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
235
    # This guarantees the process will die.  In rare cases
10866.2.4 by Maris Fogels
Tidied the code for review.
236
    # a child process may survive this if they are in a different
237
    # process group and they ignore the signals we send their parent.
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
238
    nice_killpg(process.pid)
239
240
    # The process should absolutely be dead now.
241
    assert process.poll() is not None
10866.2.4 by Maris Fogels
Tidied the code for review.
242
243
    # Drain the subprocess's stdout and stderr.
244
    print "The dying processes left behind the following output:"
245
    print "--------------- BEGIN OUTPUT ---------------"
246
    sys.stdout.write(process.stdout.read())
247
    print
248
    print "---------------- END OUTPUT ----------------"
249
250
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
251
def nice_killpg(pgid):
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
252
    """Kill a Unix process group using increasingly harmful signals."""
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
253
    try:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
254
        print "Process group %d will be killed" % pgid
255
256
        # Attempt a series of increasingly brutal methods of killing the
257
        # process.
258
        for signum in [SIGTERM, SIGINT, SIGHUP, SIGKILL]:
259
            print "Sending signal %s to process group %d" % (signum, pgid)
260
            os.killpg(pgid, signum)
261
262
            # Give the processes some time to shut down.
263
            time.sleep(3)
264
265
    except OSError, exc:
266
        if exc.errno == errno.ESRCH:
267
            # We tried to call os.killpg() and found the group to be empty.
268
            pass
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
269
        else:
270
            raise
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
271
    print "Process group %d is now empty." % pgid
272
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
273
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
274
if __name__ == '__main__':
275
    sys.exit(main())