~launchpad-pqm/launchpad/devel

14564.1.1 by Jeroen Vermeulen
lpserve lint.
1
# Copyright 2009-2011 Canonical Ltd.  This software is licensed under the
8687.15.6 by Karl Fogel
Add the copyright header block to another file.
2
# GNU Affero General Public License version 3 (see the file LICENSE).
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
3
4277.4.22 by Jonathan Lange
A tonne of docstrings and comments.
4
"""Bazaar plugin to run the smart server on Launchpad.
5
6
Cribbed from bzrlib.builtins.cmd_serve from Bazaar 0.16.
7
"""
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
8
9
__metaclass__ = type
10
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
11
__all__ = [
12
    'cmd_launchpad_server',
13
    'cmd_launchpad_forking_service',
14
    ]
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
15
16
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
17
import errno
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
18
import fcntl
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
19
import logging
11149.13.1 by John Arbash Meinel
Start working on a trivial stdin/out/err redirector.
20
import os
7675.578.1 by Michael Hudson
limit lp-serve processes to 4 (decimal) gigabytes
21
import resource
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
22
import shlex
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
23
import shutil
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
24
import signal
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
25
import socket
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
26
import sys
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
27
import tempfile
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
28
import threading
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
29
import time
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
30
11149.12.52 by John Arbash Meinel
Handle EINTR while in recv (because of SIGCHLD handler) by using osutils.read_bytes_from_socket
31
from bzrlib import (
32
    commands,
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
33
    errors,
11149.12.52 by John Arbash Meinel
Handle EINTR while in recv (because of SIGCHLD handler) by using osutils.read_bytes_from_socket
34
    lockdir,
35
    osutils,
36
    trace,
37
    ui,
38
    )
14564.1.1 by Jeroen Vermeulen
lpserve lint.
39
from bzrlib.commands import (
40
    Command,
41
    register_command,
42
    )
43
from bzrlib.option import (
44
    Option,
45
    RegistryOption,
46
    )
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
47
from bzrlib.transport import (
48
    get_transport,
49
    transport_server_registry,
50
    )
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
51
52
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
53
class cmd_launchpad_server(Command):
54
    """Run a Bazaar server that maps Launchpad branch URLs to the internal
55
    file-system format.
56
    """
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
57
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
58
    aliases = ['lp-serve']
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
59
60
    takes_options = [
14564.1.1 by Jeroen Vermeulen
lpserve lint.
61
        Option(
62
            'inet',
63
            help="serve on stdin/out for use from inetd or sshd"),
64
        Option(
65
            'port',
66
            help=(
67
                "listen for connections on nominated port of the form "
68
                "[hostname:]portnumber. Passing 0 as the port number will "
69
                "result in a dynamically allocated port. Default port is "
70
                " 4155."),
71
            type=str),
72
        Option(
73
            'upload-directory',
74
            help=(
75
                "upload branches to this directory. Defaults to "
76
                "config.codehosting.hosted_branches_root."),
77
            type=unicode),
78
        Option(
79
            'mirror-directory',
80
            help=(
81
                "serve branches from this directory. Defaults to "
82
                "config.codehosting.mirrored_branches_root.")),
83
        Option(
84
            'codehosting-endpoint',
85
            help=(
86
                "the url of the internal XML-RPC server. Defaults to "
87
                "config.codehosting.codehosting_endpoint."),
88
            type=unicode),
89
        RegistryOption(
90
            'protocol', help="Protocol to serve.",
91
            lazy_registry=('bzrlib.transport', 'transport_server_registry'),
92
            value_switches=True),
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
93
        ]
94
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
95
    takes_args = ['user_id']
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
96
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
97
    def run_server(self, smart_server):
4277.4.39 by Jonathan Lange
Docstrings.
98
        """Run the given smart server."""
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
99
        # for the duration of this server, no UI output is permitted.
100
        # note that this may cause problems with blackbox tests. This should
5520.1.4 by Aaron Bentley
Cleanup
101
        # be changed with care though, as we dont want to use bandwidth
102
        # sending progress over stderr to smart server clients!
4277.4.7 by Jonathan Lange
Add a sucky implementation of launchpad serving command.
103
        old_factory = ui.ui_factory
104
        try:
105
            ui.ui_factory = ui.SilentUIFactory()
106
            smart_server.serve()
107
        finally:
108
            ui.ui_factory = old_factory
109
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
110
    def get_host_and_port(self, port):
111
        """Return the host and port to run the smart server on.
112
113
        If 'port' is None, None will be returned for the host and port.
114
115
        If 'port' has a colon in it, the string before the colon will be
116
        interpreted as the host.
117
118
        :param port: A string of the port to run the server on.
119
        :return: A tuple of (host, port), where 'host' is a host name or IP,
120
            and port is an integer TCP/IP port.
121
        """
122
        host = None
123
        if port is not None:
124
            if ':' in port:
125
                host, port = port.split(':')
126
            port = int(port)
127
        return host, port
128
9590.1.7 by Michael Hudson
trivial fixes
129
    def run(self, user_id, port=None, branch_directory=None,
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
130
            codehosting_endpoint_url=None, inet=False, protocol=None):
7675.240.18 by Jonathan Lange
Actually hook everything up, making sure that OOPSes are logged for Bazaar errors.
131
        from lp.codehosting.bzrutils import install_oops_handler
10547.1.8 by Michael Hudson
gah, fix import
132
        from lp.codehosting.vfs import get_lp_server, hooks
7675.240.18 by Jonathan Lange
Actually hook everything up, making sure that OOPSes are logged for Bazaar errors.
133
        install_oops_handler(user_id)
7675.578.2 by Michael Hudson
easier to read
134
        four_gig = int(4e9)
135
        resource.setrlimit(resource.RLIMIT_AS, (four_gig, four_gig))
10547.1.8 by Michael Hudson
gah, fix import
136
        seen_new_branch = hooks.SetProcTitleHook()
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
137
        if protocol is None:
138
            protocol = transport_server_registry.get()
7167.9.14 by Michael Hudson
dear holy crap, an acceptance test for bug #297205!
139
        lp_server = get_lp_server(
9590.1.48 by Michael Hudson
a start at combining the puller and filesystem endpoints
140
            int(user_id), codehosting_endpoint_url, branch_directory,
9590.1.14 by Michael Hudson
merge trunk, fixing conflicts
141
            seen_new_branch.seen)
10197.5.7 by Michael Hudson
fix some more stuff
142
        lp_server.start_server()
4292.1.70 by Jonathan Lange
Simplistic support for marking branches as needing a mirror if they've been
143
        try:
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
144
            old_lockdir_timeout = lockdir._DEFAULT_TIMEOUT_SECONDS
4816.1.4 by Jonathan Lange
Logging for the smart server transport. Not everything yet, but most things.
145
            lp_transport = get_transport(lp_server.get_url())
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
146
            host, port = self.get_host_and_port(port)
6266.3.1 by Jonathan Lange
Fix up locking on the smartserver.
147
            lockdir._DEFAULT_TIMEOUT_SECONDS = 0
14363.2.1 by Jelmer Vernooij
Use server registry to allow serving different protocols.
148
            try:
149
                protocol(lp_transport, host, port, inet)
150
            finally:
151
                lockdir._DEFAULT_TIMEOUT_SECONDS = old_lockdir_timeout
4292.1.70 by Jonathan Lange
Simplistic support for marking branches as needing a mirror if they've been
152
        finally:
10197.5.7 by Michael Hudson
fix some more stuff
153
            lp_server.stop_server()
4277.4.12 by Jonathan Lange
Shift Launchpad server plugin around. Still no tests :(
154
155
156
register_command(cmd_launchpad_server)
11149.13.1 by John Arbash Meinel
Start working on a trivial stdin/out/err redirector.
157
158
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
159
class LPForkingService(object):
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
160
    """A service that can be asked to start a new bzr subprocess via fork.
161
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
162
    The basic idea is that bootstrapping time is long. Most of this is time
163
    spent during import of all needed libraries (lp.*).  For example, the
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
164
    original 'lp-serve' command could take 2.5s just to start up, before any
165
    actual actions could be performed.
166
167
    This class provides a service sitting on a socket, which can then be
168
    requested to fork and run a given bzr command.
169
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
170
    Clients connect to the socket and make a single request, which then
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
171
    receives a response. The possible requests are:
172
173
        "hello\n":  Trigger a heartbeat to report that the program is still
174
                    running, and write status information to the log file.
175
        "quit\n":   Stop the service, but do so 'nicely', waiting for children
176
                    to exit, etc. Once this is received the service will stop
177
                    taking new requests on the port.
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
178
        "fork-env <command>\n<env>\nend\n": Request a new subprocess to be
179
            started.  <command> is the bzr command to be run, such as "rocks"
180
            or "lp-serve --inet 12".
14564.1.1 by Jeroen Vermeulen
lpserve lint.
181
            The immediate response will be the path-on-disk to a directory
182
            full of named pipes (fifos) that will be the stdout/stderr/stdin
183
            (named accordingly) of the new process.
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
184
            If a client holds the socket open, when the child process exits,
185
            the exit status (as given by 'wait()') will be written to the
186
            socket.
187
188
            Note that one of the key bits is that the client will not be
189
            started with exec*, we just call 'commands.run_bzr*()' directly.
190
            This way, any modules that are already loaded will not need to be
191
            loaded again. However, care must be taken with any global-state
192
            that should be reset.
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
193
194
            fork-env allows you to supply environment variables such as
14564.1.1 by Jeroen Vermeulen
lpserve lint.
195
            "BZR_EMAIL: joe@foo.com" which will be set in os.environ before
196
            the command is run.
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
197
    """
198
199
    # Design decisions. These are bits where we could have chosen a different
200
    # method/implementation and weren't sure what would be best. Documenting
201
    # the current decision, and the alternatives.
202
    #
203
    # [Decision #1]
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
204
    #   Serve on a named AF_UNIX socket.
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
205
    #       1) It doesn't make sense to serve to arbitrary hosts, we only want
206
    #          the local host to make requests. (Since the client needs to
207
    #          access the named fifos on the current filesystem.)
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
208
    #       2) You can set security parameters on a filesystem path (g+rw,
209
    #          a-rw).
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
210
    # [Decision #2]
211
    #   SIGCHLD
212
    #       We want to quickly detect that children have exited so that we can
213
    #       inform the client process quickly. At the moment, we register a
214
    #       SIGCHLD handler that doesn't do anything. However, it means that
215
    #       when we get the signal, if we are currently blocked in something
216
    #       like '.accept()', we will jump out temporarily. At that point the
217
    #       main loop will check if any children have exited. We could have
218
    #       done this work as part of the signal handler, but that felt 'racy'
219
    #       doing any serious work in a signal handler.
220
    #       If we just used socket.timeout as the indicator to go poll for
221
    #       children exiting, it slows the disconnect by as much as the full
222
    #       timeout. (So a timeout of 1.0s will cause the process to hang by
223
    #       that long until it determines that a child has exited, and can
224
    #       close the connection.)
225
    #       The current flow means that we'll notice exited children whenever
226
    #       we finish the current work.
227
    # [Decision #3]
228
    #   Child vs Parent actions.
229
    #       There are several actions that are done when we get a new request.
230
    #       We have to create the fifos on disk, fork a new child, connect the
231
    #       child to those handles, and inform the client of the new path (not
14564.1.1 by Jeroen Vermeulen
lpserve lint.
232
    #       necessarily in that order.) It makes sense to wait to send the
233
    #       path message until after the fifos have been created. That way the
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
234
    #       client can just try to open them immediately, and the
235
    #       client-and-child will be synchronized by the open() calls.
236
    #       However, should the client be the one doing the mkfifo, should the
14564.1.1 by Jeroen Vermeulen
lpserve lint.
237
    #       server? Who should be sending the message? Should we fork after
238
    #       the mkfifo or before?
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
239
    #       The current thoughts:
14564.1.1 by Jeroen Vermeulen
lpserve lint.
240
    #           1) Try to do work in the child when possible. This should
241
    #              allow for 'scaling' because the server is single-threaded.
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
242
    #           2) We create the directory itself in the server, because that
243
    #              allows the server to monitor whether the client failed to
244
    #              clean up after itself or not.
245
    #           3) Otherwise we create the fifos in the client, and then send
246
    #              the message back.
247
    # [Decision #4]
248
    #   Exit information
14564.1.1 by Jeroen Vermeulen
lpserve lint.
249
    #       Inform the client that the child has exited on the socket they
250
    #       used to request the fork.
251
    #       1) Arguably they could see that stdout and stderr have been
252
    #          closed, and thus stop reading. In testing, I wrote a client
253
    #          which uses select.poll() over stdin/stdout/stderr and used that
254
    #          to ferry the content to the appropriate local handle. However
255
    #          for the FIFOs, when the remote end closed, I wouldn't see any
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
256
    #          corresponding information on the local end. There obviously
257
    #          wasn't any data to be read, so they wouldn't show up as
258
    #          'readable' (for me to try to read, and get 0 bytes, indicating
14564.1.1 by Jeroen Vermeulen
lpserve lint.
259
    #          it was closed). I also wasn't seeing POLLHUP, which seemed to
260
    #          be the correct indicator.  As such, we decided to inform the
261
    #          client on the socket that they originally made the fork
262
    #          request, rather than just closing the socket immediately.
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
263
    #       2) We could have had the forking server close the socket, and only
264
    #          the child hold the socket open. When the child exits, then the
265
    #          OS naturally closes the socket.
266
    #          If we want the returncode, then we should put that as bytes on
267
    #          the socket before we exit. Having the child do the work means
11149.12.84 by John Arbash Meinel
Cleanup some 'make lint' warnings.
268
    #          that in error conditions, it could easily die before being able
269
    #          to write anything (think SEGFAULT, etc). The forking server is
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
270
    #          already 'wait'() ing on its children. So that we don't get
271
    #          zombies, and with wait3() we can get the rusage (user time,
272
    #          memory consumption, etc.)
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
273
    #          As such, it seems reasonable that the server can then also
274
    #          report back when a child is seen as exiting.
275
    # [Decision #5]
276
    #   cleanup once connected
277
    #       The child process blocks during 'open()' waiting for the client to
14564.1.1 by Jeroen Vermeulen
lpserve lint.
278
    #       connect to its fifos. Once the client has connected, the child
279
    #       then deletes the temporary directory and the fifos from disk. This
280
    #       means that there isn't much left for diagnosis, but it also means
281
    #       that the client won't leave garbage around if it crashes, etc.
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
282
    #       Note that the forking service itself still monitors the paths
283
    #       created, and will delete garbage if it sees that a child failed to
284
    #       do so.
285
    # [Decision #6]
286
    #   os._exit(retcode) in the child
287
    #       Calling sys.exit(retcode) raises an exception, which then bubbles
14564.1.1 by Jeroen Vermeulen
lpserve lint.
288
    #       up the stack and runs exit functions (and finally statements).
289
    #       When I tried using it originally, I would see the current child
290
    #       bubble all the way up the stack (through the server code that it
291
    #       fork() through), and then get to main() returning code 0. The
292
    #       process would still exit nonzero. My guess is that something in
293
    #       the atexit functions was failing, but that it was happening after
294
    #       logging, etc had been shut down.
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
295
    #       Any global state from the child process should be flushed before
14564.1.1 by Jeroen Vermeulen
lpserve lint.
296
    #       run_bzr_* has exited (which we *do* wait for), and any other
297
    #       global state is probably a remnant from the service process. Which
298
    #       will be cleaned up by the service itself, rather than the child.
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
299
    #       There is some concern that log files may not get flushed, so we
300
    #       currently call sys.exitfunc() first. The main problem is that I
14564.1.1 by Jeroen Vermeulen
lpserve lint.
301
    #       don't know any way to *remove* a function registered via
302
    #       'atexit()' so if the forking service has some state, we my try to
303
    #       clean it up incorrectly.
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
304
    #       Note that the bzr script itself uses sys.exitfunc(); os._exit() in
14564.1.1 by Jeroen Vermeulen
lpserve lint.
305
    #       the 'bzr' main script, as the teardown time of all the python
306
    #       state was quite noticeable in real-world runtime. As such, bzrlib
307
    #       should be pretty safe, or it would have been failing for people
308
    #       already.
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
309
    # [Decision #7]
310
    #   prefork vs max children vs ?
311
    #       For simplicity it seemed easiest to just fork when requested. Over
312
    #       time, I realized it would be easy to allow running an arbitrary
313
    #       command (no harder than just running one command), so it seemed
314
    #       reasonable to switch over. If we go the prefork route, then we'll
315
    #       need a way to tell the pre-forked children what command to run.
316
    #       This could be as easy as just adding one more fifo that they wait
317
    #       on in the same directory.
318
    #       For now, I've chosen not to limit the number of forked children. I
319
    #       don't know what a reasonable value is, and probably there are
320
    #       already limitations at play. (If Conch limits connections, then it
321
    #       will already be doing all the work, etc.)
322
    # [Decision #8]
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
323
    #   nicer errors on the request socket
11149.12.72 by John Arbash Meinel
More cleanup and doc passes.
324
    #       This service is meant to be run only on the local system. As such,
325
    #       we don't try to be extra defensive about leaking information to
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
326
    #       the one connecting to the socket. (We should still watch out what
327
    #       we send across the per-child fifos, since those are connected to
328
    #       remote clients.) Instead we try to be helpful, and tell them as
329
    #       much as we know about what went wrong.
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
330
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
331
    DEFAULT_PATH = '/var/run/launchpad_forking_service.sock'
14564.1.1 by Jeroen Vermeulen
lpserve lint.
332
333
    # Permissions on the master socket (rw-rw----)
334
    DEFAULT_PERMISSIONS = 00660
335
336
    # Wait no more than 5 minutes for children.
337
    WAIT_FOR_CHILDREN_TIMEOUT = 5 * 60
338
11149.12.47 by John Arbash Meinel
Remove the 'status' tracking and command.
339
    SOCKET_TIMEOUT = 1.0
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
340
    SLEEP_FOR_CHILDREN_TIMEOUT = 1.0
14564.1.1 by Jeroen Vermeulen
lpserve lint.
341
342
    # No request should take longer than this to be read.
343
    WAIT_FOR_REQUEST_TIMEOUT = 1.0
344
345
    # If we get a fork() request, but nobody connects, just exit.
346
    # On a heavily loaded server it could take a few seconds, but it
347
    # should never take minutes.
348
    CHILD_CONNECT_TIMEOUT = 120
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
349
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
350
    _fork_function = os.fork
351
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
352
    def __init__(self, path=DEFAULT_PATH, perms=DEFAULT_PERMISSIONS):
353
        self.master_socket_path = path
354
        self._perms = perms
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
355
        self._start_time = None
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
356
        self._should_terminate = threading.Event()
11149.12.3 by John Arbash Meinel
Basic interface up and running.
357
        # We address these locally, in case of shutdown socket may be gc'd
358
        # before we are
359
        self._socket_timeout = socket.timeout
360
        self._socket_error = socket.error
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
361
        # Map from pid => (temp_path_for_handles, request_socket)
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
362
        self._child_processes = {}
11149.12.23 by John Arbash Meinel
Create a command that just replays content that it read back to its output.
363
        self._children_spawned = 0
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
364
        self._child_connect_timeout = self.CHILD_CONNECT_TIMEOUT
11149.12.3 by John Arbash Meinel
Basic interface up and running.
365
366
    def _create_master_socket(self):
14564.1.1 by Jeroen Vermeulen
lpserve lint.
367
        self._server_socket = socket.socket(
368
            socket.AF_UNIX, socket.SOCK_STREAM)
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
369
        self._server_socket.bind(self.master_socket_path)
370
        if self._perms is not None:
371
            os.chmod(self.master_socket_path, self._perms)
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
372
        self._server_socket.listen(5)
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
373
        self._server_socket.settimeout(self.SOCKET_TIMEOUT)
11149.12.21 by John Arbash Meinel
Unfortunately 'bzr rocks' test still takes 2s, though lp-serve takes 3.6s.
374
        trace.mutter('set socket timeout to: %s' % (self.SOCKET_TIMEOUT,))
11149.12.3 by John Arbash Meinel
Basic interface up and running.
375
11149.12.79 by John Arbash Meinel
Switch the Conch server and runlaunchpad to use the new path vs port.
376
    def _cleanup_master_socket(self):
377
        self._server_socket.close()
378
        try:
379
            os.remove(self.master_socket_path)
14564.1.1 by Jeroen Vermeulen
lpserve lint.
380
        except (OSError, IOError):
11149.12.79 by John Arbash Meinel
Switch the Conch server and runlaunchpad to use the new path vs port.
381
            # If we don't delete it, then we get 'address already in
14564.1.1 by Jeroen Vermeulen
lpserve lint.
382
            # use' failures.
383
            trace.mutter('failed to cleanup: %s' % (self.master_socket_path,))
11149.12.79 by John Arbash Meinel
Switch the Conch server and runlaunchpad to use the new path vs port.
384
11149.12.48 by John Arbash Meinel
Create a SIGCHLD handler.
385
    def _handle_sigchld(self, signum, frm):
14564.1.1 by Jeroen Vermeulen
lpserve lint.
386
        # We don't actually do anything here, we just want an interrupt
387
        # (EINTR) on socket.accept() when SIGCHLD occurs.
11149.12.66 by John Arbash Meinel
SIGTERM is now handled gracefully as a normal shutdown, rather than crashing.
388
        pass
389
390
    def _handle_sigterm(self, signum, frm):
391
        # Unregister this as the default handler, 2 SIGTERMs will exit us.
392
        signal.signal(signal.SIGTERM, signal.SIG_DFL)
14564.1.1 by Jeroen Vermeulen
lpserve lint.
393
        # SIGTERM should also generate EINTR on our wait loop, so this
394
        # should be enough.
11149.12.66 by John Arbash Meinel
SIGTERM is now handled gracefully as a normal shutdown, rather than crashing.
395
        self._should_terminate.set()
396
397
    def _register_signals(self):
398
        """Register a SIGCHILD and SIGTERM handler.
11149.12.48 by John Arbash Meinel
Create a SIGCHLD handler.
399
400
        If we have a trigger for SIGCHILD then we can quickly respond to
401
        clients when their process exits. The main risk is getting more EAGAIN
402
        errors elsewhere.
11149.12.66 by John Arbash Meinel
SIGTERM is now handled gracefully as a normal shutdown, rather than crashing.
403
404
        SIGTERM allows us to cleanup nicely before we exit.
11149.12.48 by John Arbash Meinel
Create a SIGCHLD handler.
405
        """
11149.12.66 by John Arbash Meinel
SIGTERM is now handled gracefully as a normal shutdown, rather than crashing.
406
        signal.signal(signal.SIGCHLD, self._handle_sigchld)
407
        signal.signal(signal.SIGTERM, self._handle_sigterm)
11149.12.48 by John Arbash Meinel
Create a SIGCHLD handler.
408
11149.12.66 by John Arbash Meinel
SIGTERM is now handled gracefully as a normal shutdown, rather than crashing.
409
    def _unregister_signals(self):
410
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
411
        signal.signal(signal.SIGTERM, signal.SIG_DFL)
11149.12.48 by John Arbash Meinel
Create a SIGCHLD handler.
412
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
413
    def _compute_paths(self, base_path):
414
        stdin_path = os.path.join(base_path, 'stdin')
415
        stdout_path = os.path.join(base_path, 'stdout')
416
        stderr_path = os.path.join(base_path, 'stderr')
417
        return (stdin_path, stdout_path, stderr_path)
418
11149.12.9 by John Arbash Meinel
add some tracing, fix some bugs
419
    def _create_child_file_descriptors(self, base_path):
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
420
        stdin_path, stdout_path, stderr_path = self._compute_paths(base_path)
11149.12.5 by John Arbash Meinel
We now fork, rewrite the file handles, and redirect them to the new child.
421
        os.mkfifo(stdin_path)
422
        os.mkfifo(stdout_path)
423
        os.mkfifo(stderr_path)
11149.12.9 by John Arbash Meinel
add some tracing, fix some bugs
424
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
425
    def _set_blocking(self, fd):
426
        """Change the file descriptor to unset the O_NONBLOCK flag."""
427
        flags = fcntl.fcntl(fd, fcntl.F_GETFD)
428
        flags = flags & (~os.O_NONBLOCK)
429
        fcntl.fcntl(fd, fcntl.F_SETFD, flags)
430
431
    def _open_handles(self, base_path):
432
        """Open the given file handles.
433
14564.1.1 by Jeroen Vermeulen
lpserve lint.
434
        This will attempt to open all of these file handles, but will not
435
        block while opening them, timing out after self._child_connect_timeout
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
436
        seconds.
437
14564.1.1 by Jeroen Vermeulen
lpserve lint.
438
        :param base_path: The directory where all FIFOs are located.
439
        :return: (stdin_fid, stdout_fid, stderr_fid).
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
440
        """
441
        stdin_path, stdout_path, stderr_path = self._compute_paths(base_path)
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
442
        # These open calls will block until another process connects (which
443
        # must connect in the same order)
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
444
        fids = []
445
        to_open = [(stdin_path, os.O_RDONLY), (stdout_path, os.O_WRONLY),
446
                   (stderr_path, os.O_WRONLY)]
12344.5.5 by John Arbash Meinel
Review changes from Gavin Panella.
447
        # If we set it to 0, we won't get an alarm, so require some time > 0.
448
        signal.alarm(max(1, self._child_connect_timeout))
12344.5.3 by John Arbash Meinel
Switch to using the new Timer based interrupt.
449
        tstart = time.time()
450
        for path, flags in to_open:
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
451
            try:
12344.5.3 by John Arbash Meinel
Switch to using the new Timer based interrupt.
452
                fids.append(os.open(path, flags))
14564.1.1 by Jeroen Vermeulen
lpserve lint.
453
            except OSError:
12344.5.6 by John Arbash Meinel
Document what is going on in the child if it gets interrupted.
454
                # In production code, signal.alarm will generally just kill
455
                # us. But if something installs a signal handler for SIGALRM,
456
                # do what we can to die gracefully.
457
                error = ('After %.3fs we failed to open %s, exiting'
458
                         % (time.time() - tstart, path,))
459
                trace.warning(error)
460
                for fid in fids:
461
                    try:
462
                        os.close(fid)
463
                    except OSError:
464
                        pass
465
                raise errors.BzrError(error)
12344.5.3 by John Arbash Meinel
Switch to using the new Timer based interrupt.
466
        # If we get to here, that means all the handles were opened
467
        # successfully, so cancel the wakeup call.
12344.5.4 by John Arbash Meinel
Change the code to use signal.alarm() instead of another python thread.
468
        signal.alarm(0)
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
469
        return fids
470
471
    def _cleanup_fifos(self, base_path):
472
        """Remove the FIFO objects and directory from disk."""
473
        stdin_path, stdout_path, stderr_path = self._compute_paths(base_path)
14564.1.1 by Jeroen Vermeulen
lpserve lint.
474
        # Now that we've opened the handles, delete everything so that
475
        # we don't leave garbage around.  Because the open() is done in
476
        # blocking mode, we know that someone has already connected to
477
        # them, and we don't want anyone else getting confused and
478
        # connecting.
479
        # See [Decision #5].
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
480
        os.remove(stdin_path)
481
        os.remove(stdout_path)
482
        os.remove(stderr_path)
483
        os.rmdir(base_path)
484
485
    def _bind_child_file_descriptors(self, base_path):
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
486
        # Note: by this point bzrlib has opened stderr for logging
14564.1.1 by Jeroen Vermeulen
lpserve lint.
487
        # (as part of starting the service process in the first place).
488
        # As such, it has a stream handler that writes to stderr.
489
        # logging tries to flush and close that, but the file is already
490
        # closed.
491
        # This just supresses that exception.
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
492
        stdin_fid, stdout_fid, stderr_fid = self._open_handles(base_path)
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
493
        logging.raiseExceptions = False
11149.12.5 by John Arbash Meinel
We now fork, rewrite the file handles, and redirect them to the new child.
494
        sys.stdin.close()
495
        sys.stdout.close()
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
496
        sys.stderr.close()
11149.12.5 by John Arbash Meinel
We now fork, rewrite the file handles, and redirect them to the new child.
497
        os.dup2(stdin_fid, 0)
498
        os.dup2(stdout_fid, 1)
499
        os.dup2(stderr_fid, 2)
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
500
        sys.stdin = os.fdopen(stdin_fid, 'rb')
501
        sys.stdout = os.fdopen(stdout_fid, 'wb')
502
        sys.stderr = os.fdopen(stderr_fid, 'wb')
503
        ui.ui_factory.stdin = sys.stdin
504
        ui.ui_factory.stdout = sys.stdout
505
        ui.ui_factory.stderr = sys.stderr
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
506
        self._cleanup_fifos(base_path)
11149.12.5 by John Arbash Meinel
We now fork, rewrite the file handles, and redirect them to the new child.
507
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
508
    def _close_child_file_descriptors(self):
11149.12.9 by John Arbash Meinel
add some tracing, fix some bugs
509
        sys.stdin.close()
510
        sys.stderr.close()
511
        sys.stdout.close()
512
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
513
    def become_child(self, command_argv, path):
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
514
        """We are in the spawned child code, do our magic voodoo."""
14564.1.1 by Jeroen Vermeulen
lpserve lint.
515
        retcode = 127  # Failed in a bad way, poor cleanup, etc.
12344.3.1 by John Arbash Meinel
Force there to only be one process that will ever think of itself as the master.
516
        try:
517
            # Stop tracking new signals
518
            self._unregister_signals()
519
            # Reset the start time
520
            trace._bzr_log_start_time = time.time()
521
            trace.mutter('%d starting %r'
522
                         % (os.getpid(), command_argv))
523
            self._bind_child_file_descriptors(path)
524
            retcode = self._run_child_command(command_argv)
525
        finally:
14564.1.1 by Jeroen Vermeulen
lpserve lint.
526
            # We force os._exit() here, because we don't want to unwind
527
            # the stack, which has complex results. (We can get it to
528
            # unwind back to the cmd_launchpad_forking_service code, and
529
            # even back to main() reporting thereturn code, but after
530
            # that, suddenly the return code changes from a '0' to a
531
            # '1', with no logging of info.
12344.3.1 by John Arbash Meinel
Force there to only be one process that will ever think of itself as the master.
532
            os._exit(retcode)
11149.12.19 by John Arbash Meinel
I think I have it hooked up, but the test is failing.
533
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
534
    def _run_child_command(self, command_argv):
11149.12.5 by John Arbash Meinel
We now fork, rewrite the file handles, and redirect them to the new child.
535
        # This is the point where we would actually want to do something with
536
        # our life
14564.1.1 by Jeroen Vermeulen
lpserve lint.
537
        # TODO: We may want to consider special-casing the 'lp-serve'
538
        # command.  As that is the primary use-case for this service, it
539
        # might be interesting to have an already-instantiated instance,
540
        # where we can just pop on an extra argument and be ready to go.
541
        # However, that would probably only really be measurable if we
542
        # prefork. As it looks like ~200ms is 'fork()' time, but only
543
        # 50ms is run-the-command time.
11149.12.19 by John Arbash Meinel
I think I have it hooked up, but the test is failing.
544
        retcode = commands.run_bzr_catch_errors(command_argv)
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
545
        self._close_child_file_descriptors()
11149.12.19 by John Arbash Meinel
I think I have it hooked up, but the test is failing.
546
        trace.mutter('%d finished %r'
11149.12.84 by John Arbash Meinel
Cleanup some 'make lint' warnings.
547
                     % (os.getpid(), command_argv))
12344.3.1 by John Arbash Meinel
Force there to only be one process that will ever think of itself as the master.
548
        # TODO: Should we call sys.exitfunc() here? it allows atexit
549
        #       functions to fire, however, some of those may be still
550
        #       around from the parent process, which we don't really want.
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
551
        sys.exitfunc()
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
552
        # See [Decision #6]
12344.3.1 by John Arbash Meinel
Force there to only be one process that will ever think of itself as the master.
553
        return retcode
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
554
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
555
    @staticmethod
556
    def command_to_argv(command_str):
557
        """Convert a 'foo bar' style command to [u'foo', u'bar']"""
558
        # command_str must be a utf-8 string
559
        return [s.decode('utf-8') for s in shlex.split(command_str)]
560
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
561
    @staticmethod
562
    def parse_env(env_str):
563
        """Convert the environment information into a dict.
564
565
        :param env_str: A string full of environment variable declarations.
566
            Each key is simple ascii "key: value\n"
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
567
            The string must end with "end\n".
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
568
        :return: A dict of environment variables
569
        """
570
        env = {}
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
571
        if not env_str.endswith('end\n'):
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
572
            raise ValueError('Invalid env-str: %r' % (env_str,))
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
573
        env_str = env_str[:-5]
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
574
        if not env_str:
575
            return env
576
        env_entries = env_str.split('\n')
577
        for entry in env_entries:
578
            key, value = entry.split(': ', 1)
579
            env[key] = value
580
        return env
581
582
    def fork_one_request(self, conn, client_addr, command_argv, env):
11149.12.3 by John Arbash Meinel
Basic interface up and running.
583
        """Fork myself and serve a request."""
11149.12.19 by John Arbash Meinel
I think I have it hooked up, but the test is failing.
584
        temp_name = tempfile.mkdtemp(prefix='lp-forking-service-child-')
14564.1.1 by Jeroen Vermeulen
lpserve lint.
585
        # Now that we've set everything up, send the response to the
586
        # client we create them first, so the client can start trying to
587
        # connect to them, while we fork and have the child do the same.
11149.12.23 by John Arbash Meinel
Create a command that just replays content that it read back to its output.
588
        self._children_spawned += 1
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
589
        pid = self._fork_function()
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
590
        if pid == 0:
11149.12.46 by John Arbash Meinel
Upping the socket.timeout number caused some small issues (I got an erroneous failure).
591
            pid = os.getpid()
592
            trace.mutter('%d spawned' % (pid,))
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
593
            self._server_socket.close()
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
594
            for env_var, value in env.iteritems():
595
                osutils.set_or_unset_env(env_var, value)
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
596
            # See [Decision #3]
597
            self._create_child_file_descriptors(temp_name)
11149.12.46 by John Arbash Meinel
Upping the socket.timeout number caused some small issues (I got an erroneous failure).
598
            conn.sendall('ok\n%d\n%s\n' % (pid, temp_name))
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
599
            conn.close()
600
            self.become_child(command_argv, temp_name)
11149.12.5 by John Arbash Meinel
We now fork, rewrite the file handles, and redirect them to the new child.
601
            trace.warning('become_child returned!!!')
602
            sys.exit(1)
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
603
        else:
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
604
            self._child_processes[pid] = (temp_name, conn)
11149.12.19 by John Arbash Meinel
I think I have it hooked up, but the test is failing.
605
            self.log(client_addr, 'Spawned process %s for %r: %s'
606
                            % (pid, command_argv, temp_name))
11149.12.3 by John Arbash Meinel
Basic interface up and running.
607
608
    def main_loop(self):
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
609
        self._start_time = time.time()
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
610
        self._should_terminate.clear()
11149.12.69 by John Arbash Meinel
Set the default configuration to not use the forking daemon, but the development version to use it.
611
        self._register_signals()
11149.12.3 by John Arbash Meinel
Basic interface up and running.
612
        self._create_master_socket()
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
613
        trace.note('Listening on socket: %s' % (self.master_socket_path,))
11149.12.69 by John Arbash Meinel
Set the default configuration to not use the forking daemon, but the development version to use it.
614
        try:
615
            try:
616
                self._do_loop()
617
            finally:
618
                # Stop talking to others, we are shutting down
11149.12.79 by John Arbash Meinel
Switch the Conch server and runlaunchpad to use the new path vs port.
619
                self._cleanup_master_socket()
11149.12.69 by John Arbash Meinel
Set the default configuration to not use the forking daemon, but the development version to use it.
620
        except KeyboardInterrupt:
11149.12.72 by John Arbash Meinel
More cleanup and doc passes.
621
            # SIGINT received, try to shutdown cleanly
11149.12.69 by John Arbash Meinel
Set the default configuration to not use the forking daemon, but the development version to use it.
622
            pass
623
        trace.note('Shutting down. Waiting up to %.0fs for %d child processes'
624
                   % (self.WAIT_FOR_CHILDREN_TIMEOUT,
11149.12.84 by John Arbash Meinel
Cleanup some 'make lint' warnings.
625
                      len(self._child_processes)))
11149.12.69 by John Arbash Meinel
Set the default configuration to not use the forking daemon, but the development version to use it.
626
        self._shutdown_children()
627
        trace.note('Exiting')
628
629
    def _do_loop(self):
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
630
        while not self._should_terminate.isSet():
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
631
            try:
632
                conn, client_addr = self._server_socket.accept()
633
            except self._socket_timeout:
14564.1.1 by Jeroen Vermeulen
lpserve lint.
634
                pass  # Run shutdown and children checks.
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
635
            except self._socket_error, e:
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
636
                if e.args[0] == errno.EINTR:
14564.1.1 by Jeroen Vermeulen
lpserve lint.
637
                    pass  # Run shutdown and children checks.
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
638
                elif e.args[0] != errno.EBADF:
639
                    # We can get EBADF here while we are shutting down
640
                    # So we just ignore it for now
641
                    pass
642
                else:
643
                    # Log any other failure mode
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
644
                    trace.warning("listening socket error: %s", e)
645
            else:
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
646
                self.log(client_addr, 'connected')
14564.1.1 by Jeroen Vermeulen
lpserve lint.
647
                # TODO: We should probably trap exceptions coming out of
648
                # this and log them, so that we don't kill the service
649
                # because of an unhandled error.
650
                # Note: settimeout is used so that a malformed request
651
                # doesn't cause us to hang forever.  Also note that the
652
                # particular implementation means that a malicious
653
                # client could probably send us one byte every once in a
654
                # while, and we would just keep trying to read it.
655
                # However, as a local service, we aren't worrying about
656
                # it.
11149.12.62 by John Arbash Meinel
test that incomplete messages timeout quickly, rather than hanging the server.
657
                conn.settimeout(self.WAIT_FOR_REQUEST_TIMEOUT)
658
                try:
659
                    self.serve_one_connection(conn, client_addr)
14564.1.1 by Jeroen Vermeulen
lpserve lint.
660
                except self._socket_timeout as e:
11149.12.73 by John Arbash Meinel
Support \r\n line handling, since that seems to be what telnet does.
661
                    trace.log_exception_quietly()
14564.1.1 by Jeroen Vermeulen
lpserve lint.
662
                    self.log(
663
                        client_addr, 'request timeout failure: %s' % (e,))
11149.12.62 by John Arbash Meinel
test that incomplete messages timeout quickly, rather than hanging the server.
664
                    conn.sendall('FAILURE\nrequest timed out\n')
665
                    conn.close()
14564.1.1 by Jeroen Vermeulen
lpserve lint.
666
                except Exception as e:
11869.11.3 by John Arbash Meinel
Trap otherwise unhandled exceptions so the service doesn't die randomly.
667
                    trace.log_exception_quietly()
668
                    self.log(client_addr, 'trapped a failure while handling'
669
                                          ' connection: %s' % (e,))
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
670
            self._poll_children()
11149.12.3 by John Arbash Meinel
Basic interface up and running.
671
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
672
    def log(self, client_addr, message):
11149.12.3 by John Arbash Meinel
Basic interface up and running.
673
        """Log a message to the trace log.
674
675
        Include the information about what connection is being served.
676
        """
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
677
        if client_addr is not None:
678
            # Note, we don't use conn.getpeername() because if a client
679
            # disconnects before we get here, that raises an exception
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
680
            conn_info = '[%s] ' % (client_addr,)
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
681
        else:
682
            conn_info = ''
683
        trace.mutter('%s%s' % (conn_info, message))
684
685
    def log_information(self):
686
        """Log the status information.
687
688
        This includes stuff like number of children, and ... ?
689
        """
11149.12.32 by John Arbash Meinel
I think I have the endpoints connected up.
690
        self._poll_children()
11149.12.23 by John Arbash Meinel
Create a command that just replays content that it read back to its output.
691
        self.log(None, 'Running for %.3fs' % (time.time() - self._start_time))
692
        self.log(None, '%d children currently running (spawned %d total)'
693
                       % (len(self._child_processes), self._children_spawned))
694
        # Read the current information about memory consumption, etc.
11149.12.32 by John Arbash Meinel
I think I have the endpoints connected up.
695
        self.log(None, 'Self: %s'
696
                       % (resource.getrusage(resource.RUSAGE_SELF),))
697
        # This seems to be the sum of all rusage for all children that have
698
        # been collected (not for currently running children, or ones we
699
        # haven't "wait"ed on.) We may want to read /proc/PID/status, since
700
        # 'live' information is probably more useful.
701
        self.log(None, 'Finished children: %s'
702
                       % (resource.getrusage(resource.RUSAGE_CHILDREN),))
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
703
704
    def _poll_children(self):
705
        """See if children are still running, etc.
706
707
        One interesting hook here would be to track memory consumption, etc.
708
        """
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
709
        while self._child_processes:
11149.12.40 by John Arbash Meinel
Start checking the returncode message.
710
            try:
711
                c_id, exit_code, rusage = os.wait3(os.WNOHANG)
712
            except OSError, e:
713
                if e.errno == errno.ECHILD:
714
                    # TODO: We handle this right now because the test suite
715
                    #       fakes a child, since we wanted to test some code
716
                    #       without actually forking anything
717
                    trace.mutter('_poll_children() called, and'
718
                        ' self._child_processes indicates there are'
719
                        ' children, but os.wait3() says there are not.'
720
                        ' current_children: %s' % (self._child_processes,))
721
                    return
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
722
            if c_id == 0:
11149.12.37 by John Arbash Meinel
We are getting an odd return code.
723
                # No more children stopped right now
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
724
                return
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
725
            c_path, sock = self._child_processes.pop(c_id)
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
726
            trace.mutter('%s exited %s and usage: %s'
727
                         % (c_id, exit_code, rusage))
12344.5.6 by John Arbash Meinel
Document what is going on in the child if it gets interrupted.
728
            # Cleanup the child path, before mentioning it exited to the
729
            # caller. This avoids a race condition in the test suite.
730
            if os.path.exists(c_path):
731
                # The child failed to cleanup after itself, do the work here
732
                trace.warning('Had to clean up after child %d: %s\n'
733
                              % (c_id, c_path))
734
                shutil.rmtree(c_path, ignore_errors=True)
11149.12.54 by John Arbash Meinel
Start documenting the rationale behind various choices.
735
            # See [Decision #4]
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
736
            try:
737
                sock.sendall('exited\n%s\n' % (exit_code,))
738
            except (self._socket_timeout, self._socket_error), e:
739
                # The client disconnected before we wanted them to,
740
                # no big deal
741
                trace.mutter('%s\'s socket already closed: %s' % (c_id, e))
742
            else:
743
                sock.close()
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
744
11149.12.27 by John Arbash Meinel
now its working.
745
    def _wait_for_children(self, secs):
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
746
        start = time.time()
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
747
        end = start + secs
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
748
        while self._child_processes:
749
            self._poll_children()
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
750
            if secs > 0 and time.time() > end:
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
751
                break
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
752
            time.sleep(self.SLEEP_FOR_CHILDREN_TIMEOUT)
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
753
11149.12.27 by John Arbash Meinel
now its working.
754
    def _shutdown_children(self):
755
        self._wait_for_children(self.WAIT_FOR_CHILDREN_TIMEOUT)
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
756
        if self._child_processes:
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
757
            trace.warning('Children still running: %s'
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
758
                % ', '.join(map(str, self._child_processes)))
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
759
            for c_id in self._child_processes:
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
760
                trace.warning('sending SIGINT to %d' % (c_id,))
761
                os.kill(c_id, signal.SIGINT)
762
            # We sent the SIGINT signal, see if they exited
11149.12.47 by John Arbash Meinel
Remove the 'status' tracking and command.
763
            self._wait_for_children(self.SLEEP_FOR_CHILDREN_TIMEOUT)
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
764
        if self._child_processes:
765
            # No? Then maybe something more powerful
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
766
            for c_id in self._child_processes:
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
767
                trace.warning('sending SIGKILL to %d' % (c_id,))
768
                os.kill(c_id, signal.SIGKILL)
769
            # We sent the SIGKILL signal, see if they exited
11149.12.47 by John Arbash Meinel
Remove the 'status' tracking and command.
770
            self._wait_for_children(self.SLEEP_FOR_CHILDREN_TIMEOUT)
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
771
        if self._child_processes:
11149.12.41 by John Arbash Meinel
Test suite passing again, this time with the master process
772
            for c_id, (c_path, sock) in self._child_processes.iteritems():
773
                # TODO: We should probably put something into this message?
774
                #       However, the likelyhood is very small that this isn't
775
                #       already closed because of SIGKILL + _wait_for_children
776
                #       And I don't really know what to say...
777
                sock.close()
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
778
                if os.path.exists(c_path):
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
779
                    trace.warning('Cleaning up after immortal child %d: %s\n'
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
780
                                  % (c_id, c_path))
781
                    shutil.rmtree(c_path)
11149.12.3 by John Arbash Meinel
Basic interface up and running.
782
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
783
    def _parse_fork_request(self, conn, client_addr, request):
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
784
        if request.startswith('fork-env '):
11149.12.61 by John Arbash Meinel
Handle messages that take a while to get sent.
785
            while not request.endswith('end\n'):
786
                request += osutils.read_bytes_from_socket(conn)
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
787
            command, env = request[9:].split('\n', 1)
788
        else:
789
            command = request[5:].strip()
14564.1.1 by Jeroen Vermeulen
lpserve lint.
790
            env = 'end\n'  # No env set.
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
791
        try:
792
            command_argv = self.command_to_argv(command)
793
            env = self.parse_env(env)
794
        except Exception, e:
795
            # TODO: Log the traceback?
796
            self.log(client_addr, 'command or env parsing failed: %r'
797
                                  % (str(e),))
798
            conn.sendall('FAILURE\ncommand or env parsing failed: %r'
799
                         % (str(e),))
800
        else:
801
            return command_argv, env
802
        return None, None
803
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
804
    def serve_one_connection(self, conn, client_addr):
11149.12.61 by John Arbash Meinel
Handle messages that take a while to get sent.
805
        request = ''
806
        while '\n' not in request:
807
            request += osutils.read_bytes_from_socket(conn)
11149.12.73 by John Arbash Meinel
Support \r\n line handling, since that seems to be what telnet does.
808
        # telnet likes to use '\r\n' rather than '\n', and it is nice to have
809
        # an easy way to debug.
810
        request = request.replace('\r\n', '\n')
11149.12.37 by John Arbash Meinel
We are getting an odd return code.
811
        self.log(client_addr, 'request: %r' % (request,))
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
812
        if request == 'hello\n':
11149.12.31 by John Arbash Meinel
Change the returned information to always start with 'ok' or 'FAILURE'.
813
            conn.sendall('ok\nyep, still alive\n')
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
814
            self.log_information()
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
815
            conn.close()
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
816
        elif request == 'quit\n':
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
817
            self._should_terminate.set()
11149.12.31 by John Arbash Meinel
Change the returned information to always start with 'ok' or 'FAILURE'.
818
            conn.sendall('ok\nquit command requested... exiting\n')
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
819
            conn.close()
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
820
        elif request.startswith('child_connect_timeout '):
821
            try:
12344.5.4 by John Arbash Meinel
Change the code to use signal.alarm() instead of another python thread.
822
                value = int(request.split(' ', 1)[1])
12344.5.1 by John Arbash Meinel
Change how the children connect to their fifos.
823
            except ValueError, e:
824
                conn.sendall('FAILURE: %r\n' % (e,))
825
            else:
826
                self._child_connect_timeout = value
827
                conn.sendall('ok\n')
828
            conn.close()
11149.12.60 by John Arbash Meinel
Change it so that we use a different main command.
829
        elif request.startswith('fork ') or request.startswith('fork-env '):
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
830
            command_argv, env = self._parse_fork_request(conn, client_addr,
831
                                                         request)
832
            if command_argv is not None:
833
                # See [Decision #7]
11149.12.19 by John Arbash Meinel
I think I have it hooked up, but the test is failing.
834
                # TODO: Do we want to limit the number of children? And/or
835
                #       prefork additional instances? (the design will need to
836
                #       change if we prefork and run arbitrary commands.)
11149.12.59 by John Arbash Meinel
Enable passing env vars to the 'fork' request.
837
                self.fork_one_request(conn, client_addr, command_argv, env)
838
                # We don't close the conn like other code paths, since we use
839
                # it again later.
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
840
            else:
841
                conn.close()
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
842
        else:
11149.12.7 by John Arbash Meinel
Start working on a trivial client-side command.
843
            self.log(client_addr, 'FAILURE: unknown request: %r' % (request,))
11149.12.81 by John Arbash Meinel
Respond to Andrew's feedback.
844
            # See [Decision #8]
11149.12.31 by John Arbash Meinel
Change the returned information to always start with 'ok' or 'FAILURE'.
845
            conn.sendall('FAILURE\nunknown request: %r\n' % (request,))
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
846
            conn.close()
11149.12.3 by John Arbash Meinel
Basic interface up and running.
847
11149.12.4 by John Arbash Meinel
Got a working service that forks children (which do nothing so far)
848
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
849
class cmd_launchpad_forking_service(Command):
11149.12.3 by John Arbash Meinel
Basic interface up and running.
850
    """Launch a long-running process, where you can ask for new processes.
851
14564.1.1 by Jeroen Vermeulen
lpserve lint.
852
    The process will block on a given AF_UNIX socket waiting for requests to
853
    be made.  When a request is made, it will fork itself and redirect
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
854
    stdout/in/err to fifos on the filesystem, and start running the requested
14564.1.1 by Jeroen Vermeulen
lpserve lint.
855
    command.  The caller will be informed where those file handles can be
856
    found.  Thus it only makes sense that the process connecting to the port
857
    must be on the same system.
11149.13.1 by John Arbash Meinel
Start working on a trivial stdin/out/err redirector.
858
    """
859
11149.12.3 by John Arbash Meinel
Basic interface up and running.
860
    aliases = ['lp-service']
861
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
862
    takes_options = [Option('path',
863
                        help='Listen for connections at PATH',
11149.12.3 by John Arbash Meinel
Basic interface up and running.
864
                        type=str),
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
865
                     Option('perms',
866
                        help='Set the mode bits for the socket, interpreted'
867
                             ' as an octal integer (same as chmod)'),
11149.12.20 by John Arbash Meinel
Change the code to allow --no-preload to help the test suite.
868
                     Option('preload',
11149.12.21 by John Arbash Meinel
Unfortunately 'bzr rocks' test still takes 2s, though lp-serve takes 3.6s.
869
                        help="Do/don't preload libraries before startup."),
11149.12.83 by John Arbash Meinel
Review feedback from Michael Hudson.
870
                     Option('children-timeout', type=int, argname='SEC',
871
                        help="Only wait SEC seconds for children to exit"),
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
872
                     Option('pid-file', type=unicode,
873
                        help='Write the process PID to this file.')
11149.13.4 by John Arbash Meinel
Now have a way to spawn a random command, and have it bind its in/out to named pipes.
874
                    ]
11149.12.3 by John Arbash Meinel
Basic interface up and running.
875
876
    def _preload_libraries(self):
877
        for pyname in libraries_to_preload:
878
            try:
879
                __import__(pyname)
880
            except ImportError, e:
881
                trace.mutter('failed to preload %s: %s' % (pyname, e))
882
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
883
    def _daemonize(self, pid_filename):
884
        """Turn this process into a child-of-init daemon.
885
886
        Upon request, we relinquish our control and switch to daemon mode,
887
        writing out the final pid of the daemon process.
888
        """
889
        # If fork fails, it will bubble out naturally and be reported by the
890
        # cmd logic
891
        pid = os.fork()
892
        if pid > 0:
893
            # Original process exits cleanly
894
            os._exit(0)
895
11869.11.4 by John Arbash Meinel
Reduce the chance of a race condition by waiting for the socket, rather than the pid file.
896
        # Disconnect from the parent process
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
897
        os.setsid()
898
899
        # fork again, to truly become a daemon.
900
        pid = os.fork()
901
        if pid > 0:
902
            os._exit(0)
903
904
        # Redirect file handles
11869.11.4 by John Arbash Meinel
Reduce the chance of a race condition by waiting for the socket, rather than the pid file.
905
        stdin = open('/dev/null', 'r')
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
906
        os.dup2(stdin.fileno(), sys.stdin.fileno())
11869.11.4 by John Arbash Meinel
Reduce the chance of a race condition by waiting for the socket, rather than the pid file.
907
        stdout = open('/dev/null', 'a+')
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
908
        os.dup2(stdout.fileno(), sys.stdout.fileno())
11869.11.4 by John Arbash Meinel
Reduce the chance of a race condition by waiting for the socket, rather than the pid file.
909
        stderr = open('/dev/null', 'a+', 0)
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
910
        os.dup2(stderr.fileno(), sys.stderr.fileno())
911
912
        # Now that we are a daemon, let people know what pid is running
913
        f = open(pid_filename, 'wb')
914
        try:
915
            f.write('%d\n' % (os.getpid(),))
916
        finally:
917
            f.close()
918
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
919
    def run(self, path=None, perms=None, preload=True,
11869.11.1 by John Arbash Meinel
Start working on a --pid-file option.
920
            children_timeout=LPForkingService.WAIT_FOR_CHILDREN_TIMEOUT,
921
            pid_file=None):
922
        if pid_file is not None:
923
            self._daemonize(pid_file)
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
924
        if path is None:
925
            path = LPForkingService.DEFAULT_PATH
926
        if perms is None:
927
            perms = LPForkingService.DEFAULT_PERMISSIONS
11149.12.20 by John Arbash Meinel
Change the code to allow --no-preload to help the test suite.
928
        if preload:
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
929
            # We 'note' this because it often takes a fair amount of time.
11149.12.20 by John Arbash Meinel
Change the code to allow --no-preload to help the test suite.
930
            trace.note('Preloading %d modules' % (len(libraries_to_preload),))
931
            self._preload_libraries()
11149.12.78 by John Arbash Meinel
Change LPForkingService to use a unix domain socket.
932
        service = LPForkingService(path, perms)
11149.12.21 by John Arbash Meinel
Unfortunately 'bzr rocks' test still takes 2s, though lp-serve takes 3.6s.
933
        service.WAIT_FOR_CHILDREN_TIMEOUT = children_timeout
11149.12.3 by John Arbash Meinel
Basic interface up and running.
934
        service.main_loop()
11869.11.2 by John Arbash Meinel
stop_service() now waits for the service to actually exit, and gets increasingly demanding about it.
935
        if pid_file is not None:
936
            try:
937
                os.remove(pid_file)
938
            except (OSError, IOError), e:
939
                trace.mutter('Failed to cleanup pid_file: %s\n%s'
940
                             % (pid_file, e))
11149.12.2 by John Arbash Meinel
Start working on some basic infrastructure.
941
11149.12.17 by John Arbash Meinel
change the name a bit, prepare for changing the 'fork' request.
942
register_command(cmd_launchpad_forking_service)
11149.12.3 by John Arbash Meinel
Basic interface up and running.
943
944
11149.12.23 by John Arbash Meinel
Create a command that just replays content that it read back to its output.
945
class cmd_launchpad_replay(Command):
946
    """Write input from stdin back to stdout or stderr.
947
948
    This is a hidden command, primarily available for testing
949
    cmd_launchpad_forking_service.
950
    """
951
952
    hidden = True
953
954
    def run(self):
955
        # Just read line-by-line from stdin, and write out to stdout or stderr
956
        # depending on the prefix
957
        for line in sys.stdin:
958
            channel, contents = line.split(' ', 1)
959
            channel = int(channel)
960
            if channel == 1:
961
                sys.stdout.write(contents)
962
                sys.stdout.flush()
963
            elif channel == 2:
964
                sys.stderr.write(contents)
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
965
                sys.stderr.flush()
11149.12.23 by John Arbash Meinel
Create a command that just replays content that it read back to its output.
966
            else:
967
                raise RuntimeError('Invalid channel request.')
11149.12.26 by John Arbash Meinel
Still not quite working right, but getting better.
968
        return 0
11149.12.23 by John Arbash Meinel
Create a command that just replays content that it read back to its output.
969
970
register_command(cmd_launchpad_replay)
971
14564.1.1 by Jeroen Vermeulen
lpserve lint.
972
# This list was generated by "run lsprof"ing a spawned child, and
973
# looking for <module ...> times, which indicate that an import
974
# occurred.  Another option is to run "bzr lp-serve --profile-imports"
975
# manually, and observe what was expensive to import.  It doesn't seem
976
# very easy to get this right automatically.
11149.12.3 by John Arbash Meinel
Basic interface up and running.
977
libraries_to_preload = [
978
    'bzrlib.errors',
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
979
    'bzrlib.repofmt.groupcompress_repo',
980
    'bzrlib.repository',
11149.12.8 by John Arbash Meinel
Bring in the code from the other fork, but rework it.
981
    'bzrlib.smart',
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
982
    'bzrlib.smart.protocol',
983
    'bzrlib.smart.request',
11149.12.8 by John Arbash Meinel
Bring in the code from the other fork, but rework it.
984
    'bzrlib.smart.server',
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
985
    'bzrlib.smart.vfs',
986
    'bzrlib.transport.local',
987
    'bzrlib.transport.readonly',
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
988
    'lp.codehosting.bzrutils',
989
    'lp.codehosting.vfs',
990
    'lp.codehosting.vfs.branchfs',
991
    'lp.codehosting.vfs.branchfsclient',
11149.12.49 by John Arbash Meinel
Include a few more modules to be preloaded.
992
    'lp.codehosting.vfs.hooks',
11149.12.6 by John Arbash Meinel
Preloading and running cmd_launchpad_server seems to be working.
993
    'lp.codehosting.vfs.transport',
11149.12.3 by John Arbash Meinel
Basic interface up and running.
994
    ]
11149.12.10 by John Arbash Meinel
Start building the testing infrastructure.
995
996
997
def load_tests(standard_tests, module, loader):
998
    standard_tests.addTests(loader.loadTestsFromModuleNames(
999
        [__name__ + '.' + x for x in [
1000
            'test_lpserve',
1001
        ]]))
1002
    return standard_tests