~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.10 by Karl Fogel
Add the copyright header block to top-level files.
2
#
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
3
# Copyright 2009, 2010 Canonical Ltd.  This software is licensed under the
8687.15.10 by Karl Fogel
Add the copyright header block to top-level files.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
5
6
"""Tests that get run automatically on a merge."""
9722.1.1 by Gary Poster
[testfix][r=salgado][ui=none] Try to fix the buildbot problem: buildbot is starting test_on_merge in a way that no-one else is.
7
import _pythonpath
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
8
14612.2.9 by William Grant
Other bits and pieces.
9
import errno
10
import os
11
import select
12
from signal import (
13
    SIGHUP,
14
    SIGINT,
15
    SIGKILL,
16
    SIGTERM,
17
    )
18
from StringIO import StringIO
19
from subprocess import (
20
    PIPE,
21
    Popen,
22
    STDOUT,
23
    )
24
import sys
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
25
import tabnanny
14612.2.9 by William Grant
Other bits and pieces.
26
import time
27
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
28
import psycopg2
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
29
6393.2.1 by Joey Stanford
change the rather long timeout setting to something more reasonable and cleanup the explaination
30
# The TIMEOUT setting (expressed in seconds) affects how long a test will run
31
# before it is deemed to be hung, and then appropriately terminated.
32
# It's principal use is preventing a PQM job from hanging indefinitely and
33
# backing up the queue.
10866.2.2 by Maris Fogels
Lowered the test timeout a bit.
34
# e.g. Usage: TIMEOUT = 60 * 10
35
# This will set the timeout to 10 minutes.
36
TIMEOUT = 60 * 10
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
37
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
38
HERE = os.path.dirname(os.path.realpath(__file__))
39
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
40
41
def main():
8234.1.4 by Gary Poster
test works, nominally; and bin/py is a bit more functional. problems with import warnings are more serious because they cause tests to fail.
42
    """Call bin/test with whatever arguments this script was run with.
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
43
10866.2.14 by Maris Fogels
Remove the tabnanny code as pocketlint and text editors already catch this stuff.
44
    Prior to running the tests this script sets up the test database.
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
45
46
    Returns 1 on error, otherwise it returns the testrunner's exit code.
47
    """
48
    if setup_test_database() != 0:
49
        return 1
50
51
    return run_test_process()
52
53
54
def setup_test_database():
55
    """Set up a test instance of our postgresql database.
56
57
    Returns 0 for success, 1 for errors.
58
    """
1064.1.3 by James Henstridge
merge from marius
59
    # Sanity check PostgreSQL version. No point in trying to create a test
60
    # database when PostgreSQL is too old.
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
61
    con = psycopg2.connect('dbname=template1')
1064.1.3 by James Henstridge
merge from marius
62
    cur = con.cursor()
63
    cur.execute('show server_version')
64
    server_version = cur.fetchone()[0]
65
    try:
66
        numeric_server_version = tuple(map(int, server_version.split('.')))
67
    except ValueError:
68
        # Skip this check if the version number is more complicated than
69
        # we expected.
70
        pass
71
    else:
3257.1.1 by Stuart Bishop
PostgreSQL 8.0+ required
72
        if numeric_server_version < (8, 0):
73
            print 'Your PostgreSQL version is too old.  You need 8.x.x'
1064.1.3 by James Henstridge
merge from marius
74
            print 'You have %s' % server_version
75
            return 1
76
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
77
    # Drop the template database if it exists - the Makefile does this
78
    # too, but we can explicity check for errors here
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
79
    con = psycopg2.connect('dbname=template1')
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
80
    con.set_isolation_level(0)
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
81
    cur = con.cursor()
1064.1.3 by James Henstridge
merge from marius
82
    try:
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
83
        cur.execute('drop database launchpad_ftest_template')
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
84
    except psycopg2.ProgrammingError, x:
1520 by Canonical.com Patch Queue Manager
Review and fix database security update code
85
        if 'does not exist' not in str(x):
86
            raise
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
87
88
    # If there are existing database connections, terminate. We have
89
    # rogue processes still connected to the database.
90
    for loop in range(2):
91
        cur.execute("""
92
            SELECT usename, current_query
93
            FROM pg_stat_activity
94
            WHERE datname IN (
95
                'launchpad_dev', 'launchpad_ftest_template', 'launchpad_ftest')
96
            """)
97
        results = list(cur.fetchall())
98
        if not results:
99
            break
100
        # Rogue processes. Report, sleep for a bit, and try again.
101
        for usename, current_query in results:
102
            print '!! Open connection %s - %s' % (usename, current_query)
103
        print 'Sleeping'
104
        time.sleep(20)
105
    else:
106
        print 'Cannot rebuild database. There are open connections.'
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
107
        return 1
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
108
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
109
    cur.close()
110
    con.close()
111
112
    # Build the template database. Tests duplicate this.
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
113
    schema_dir = os.path.join(HERE, 'database', 'schema')
10303.1.15 by Gary Poster
make test_on_merge.py work, so buildbot can pass
114
    if os.system('cd %s; make test > /dev/null' % (schema_dir)) != 0:
1764 by Canonical.com Patch Queue Manager
Now make check fails if anything goes wrong when loading the sampledata. r=stub
115
        print 'Failed to create database or load sampledata.'
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
116
        return 1
117
118
    # Sanity check the database. No point running tests if the
119
    # bedrock is crumbling.
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
120
    con = psycopg2.connect('dbname=launchpad_ftest_template')
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
121
    cur = con.cursor()
122
    cur.execute('show search_path')
123
    search_path = cur.fetchone()[0]
124
    if search_path != '$user,public,ts2':
125
        print 'Search path incorrect.'
126
        print 'Add the following line to /etc/postgresql/postgresql.conf:'
127
        print "    search_path = '$user,public,ts2'"
1064.1.3 by James Henstridge
merge from marius
128
        print "and tell postgresql to reload its configuration file."
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
129
        return 1
130
    cur.execute("""
131
        select pg_encoding_to_char(encoding) as encoding from pg_database
132
        where datname='launchpad_ftest_template'
133
        """)
134
    enc = cur.fetchone()[0]
3242.1.5 by Stuart Bishop
Make test_on_merge.py work with PostgreSQL 8.1
135
    if enc not in ('UNICODE', 'UTF8'):
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
136
        print 'Database encoding incorrectly set'
137
        return 1
1257 by Canonical.com Patch Queue Manager
Improve database locale checks, add locale sanity check to test_on_merge.py and improve test_on_merge output
138
    cur.execute(r"""
139
        SELECT setting FROM pg_settings
140
        WHERE context='internal' AND name='lc_ctype'
141
        """)
142
    loc = cur.fetchone()[0]
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
143
    #if not (loc.startswith('en_') or loc in ('C', 'en')):
144
    if loc != 'C':
1257 by Canonical.com Patch Queue Manager
Improve database locale checks, add locale sanity check to test_on_merge.py and improve test_on_merge output
145
        print 'Database locale incorrectly set. Need to rerun initdb.'
146
        return 1
147
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
148
    # Explicity close our connections - things will fail if we leave open
149
    # connections.
150
    cur.close()
151
    del cur
152
    con.close()
153
    del con
154
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
155
    return 0
156
157
158
def run_test_process():
159
    """Start the testrunner process and return its exit code."""
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
160
    print 'Running tests.'
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
161
    os.chdir(HERE)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
162
163
    # We run the test suite under a virtual frame buffer server so that the
164
    # JavaScript integration test suite can run.
10096.2.2 by Bjorn Tillenius
Run xvfb-run inside test_on_merge.py.
165
    cmd = [
10866.2.15 by Maris Fogels
Re-added the shell option to our subprocess call. The script fails with strange errors if we do not include this option.
166
        '/usr/bin/xvfb-run',
11243.1.1 by Maris Fogels
Make Xvfb log its startup errors, and clarify the startup arguments a bit.
167
        "--error-file=/var/tmp/xvfb-errors.log",
168
        "--server-args='-screen 0 1024x768x24'",
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
169
        os.path.join(HERE, 'bin', 'test')] + sys.argv[1:]
10096.2.3 by Bjorn Tillenius
Use xvfb-run in test_on_merge.py
170
    command_line = ' '.join(cmd)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
171
    print "Running command:", command_line
4107.1.9 by Stuart Bishop
Increase test_on_merge timeout to one hour
172
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
173
    # Run the test suite.  Make the suite the leader of a new process group
174
    # so that we can signal the group without signaling ourselves.
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
175
    xvfb_proc = Popen(
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
176
        command_line,
177
        stdout=PIPE,
178
        stderr=STDOUT,
10866.2.15 by Maris Fogels
Re-added the shell option to our subprocess call. The script fails with strange errors if we do not include this option.
179
        preexec_fn=os.setpgrp,
180
        shell=True)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
181
10866.2.4 by Maris Fogels
Tidied the code for review.
182
    # This code is very similar to what takes place in Popen._communicate(),
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
183
    # but this code times out if there is no activity on STDOUT for too long.
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
184
    # This keeps us from blocking when reading from a hung testrunner, allows
185
    # us to time out if the child process hangs, and avoids issues when using
186
    # Popen.communicate() with large data sets.
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
187
    open_readers = set([xvfb_proc.stdout])
3308.1.4 by Stuart Bishop
Fix test_on_merge.py incremental output
188
    while open_readers:
11316.12.4 by Martin Pool
Cleaner comment; complete sentence
189
        # select() blocks for a long time and can easily fail with EINTR
190
        # <https://bugs.launchpad.net/launchpad/+bug/615740>.  Really we
191
        # should have EINTR protection across the whole script (other syscalls
192
        # might be interrupted) but this is the longest and most likely to
193
        # hit, and doing it perfectly in python has proved to be quite hard in
194
        # bzr. -- mbp 20100924
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
195
        while True:
196
            try:
197
                rlist, wlist, xlist = select.select(open_readers, [], [], TIMEOUT)
198
                break
199
            except select.error, e:
11316.12.2 by Martin Pool
select.error doesn't have an errno attribute, only an args tuple
200
                # nb: select.error doesn't expose a named 'errno' attribute,
201
                # at least in python 2.6.5; see
202
                # <http://mail.python.org/pipermail/python-dev/2000-October/009671.html>
203
                if e[0] == errno.EINTR:
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
204
                    continue
205
                else:
206
                    raise
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
207
4092.2.18 by Barry Warsaw
Respond to spiv's review:
208
        if len(rlist) == 0:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
209
            # The select() statement timed out!
210
211
            if xvfb_proc.poll() is not None:
212
                # The process we were watching died.
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
213
                break
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
214
10866.2.5 by Maris Fogels
Nicer function name
215
            cleanup_hung_testrunner(xvfb_proc)
4092.2.12 by Barry Warsaw
Thanks to jamesh, do a better job of draining the subprocess's stdout and
216
            break
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
217
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
218
        if xvfb_proc.stdout in rlist:
219
            # Read a chunk of output from STDOUT.
220
            chunk = os.read(xvfb_proc.stdout.fileno(), 1024)
3367.1.2 by Stuart Bishop
Don't insert random spaces in test_on_merge.py output
221
            sys.stdout.write(chunk)
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
222
            if chunk == "":
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
223
                # Gracefully exit the loop if STDOUT is empty.
224
                open_readers.remove(xvfb_proc.stdout)
225
10866.2.4 by Maris Fogels
Tidied the code for review.
226
    rv = xvfb_proc.wait()
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
227
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
228
    if rv == 0:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
229
        print
230
        print 'Successfully ran all tests.'
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
231
    else:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
232
        print
233
        print 'Tests failed (exit code %d)' % rv
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
234
235
    return rv
236
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
237
10866.2.5 by Maris Fogels
Nicer function name
238
def cleanup_hung_testrunner(process):
10866.2.4 by Maris Fogels
Tidied the code for review.
239
    """Kill and clean up the testrunner process and its children."""
240
    print
241
    print
242
    print ("WARNING: A test appears to be hung. There has been no "
243
        "output for %d seconds." % TIMEOUT)
244
    print "Forcibly shutting down the test suite"
245
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
246
    # This guarantees the process will die.  In rare cases
10866.2.4 by Maris Fogels
Tidied the code for review.
247
    # a child process may survive this if they are in a different
248
    # process group and they ignore the signals we send their parent.
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
249
    nice_killpg(process.pid)
250
251
    # The process should absolutely be dead now.
252
    assert process.poll() is not None
10866.2.4 by Maris Fogels
Tidied the code for review.
253
254
    # Drain the subprocess's stdout and stderr.
255
    print "The dying processes left behind the following output:"
256
    print "--------------- BEGIN OUTPUT ---------------"
257
    sys.stdout.write(process.stdout.read())
258
    print
259
    print "---------------- END OUTPUT ----------------"
260
261
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
262
def nice_killpg(pgid):
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
263
    """Kill a Unix process group using increasingly harmful signals."""
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
264
    try:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
265
        print "Process group %d will be killed" % pgid
266
267
        # Attempt a series of increasingly brutal methods of killing the
268
        # process.
269
        for signum in [SIGTERM, SIGINT, SIGHUP, SIGKILL]:
270
            print "Sending signal %s to process group %d" % (signum, pgid)
271
            os.killpg(pgid, signum)
272
273
            # Give the processes some time to shut down.
274
            time.sleep(3)
275
276
    except OSError, exc:
277
        if exc.errno == errno.ESRCH:
278
            # We tried to call os.killpg() and found the group to be empty.
279
            pass
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
280
        else:
281
            raise
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
282
    print "Process group %d is now empty." % pgid
283
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
284
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
285
if __name__ == '__main__':
286
    sys.exit(main())