2
# Copyright (C) 2007-2008 The University of Melbourne
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU General Public License for more details.
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22
# Runs a student script in a safe execution environment.
25
from ivle import studpath
26
from ivle.util import IVLEJailError, split_path
35
# TODO: Make progressive output work
36
# Question: Will having a large buffer size stop progressive output from
37
# working on smaller output
39
CGI_BLOCK_SIZE = 65535
40
PATH = "/usr/local/bin:/usr/bin:/bin"
42
def interpret_file(req, owner, jail_dir, filename, interpreter, gentle=True,
44
"""Serves a file by interpreting it using one of IVLE's builtin
45
interpreters. All interpreters are intended to run in the user's jail. The
46
jail location is provided as an argument to the interpreter but it is up
47
to the individual interpreters to create the jail.
49
req: An IVLE request object.
50
owner: The user who owns the file being served.
51
jail_dir: Absolute path to the user's jail.
52
filename: Absolute filename within the user's jail.
53
interpreter: A function object to call.
55
overrides: A dict mapping env var names to strings, to override arbitrary
56
environment variables in the resulting CGI environent.
58
# We can't test here whether or not the target file actually exists,
59
# because the apache user may not have permission. Instead we have to
60
# rely on the interpreter generating an error.
61
if filename.startswith(os.sep):
62
filename_abs = filename
63
filename_rel = filename[1:]
65
filename_abs = os.path.join(os.sep, filename)
66
filename_rel = filename
68
# (Note: files are executed by their owners, not the logged in user.
69
# This ensures users are responsible for their own programs and also
70
# allows them to be executed by the public).
72
# Split up req.path again, this time with respect to the jail
73
(working_dir, _) = os.path.split(filename_abs)
74
# jail_dir is the absolute jail directory.
75
# path is the filename relative to the user's jail.
76
# working_dir is the directory containing the file relative to the user's
78
# (Note that paths "relative" to the jail actually begin with a '/' as
79
# they are absolute in the jailspace)
81
return interpreter(owner, jail_dir, working_dir, filename_abs, req,
82
gentle, overrides=overrides)
85
"""Stores flags regarding the state of reading CGI output.
86
If this is to be gentle, detection of invalid headers will result in an
88
def __init__(self, begentle=True):
89
self.gentle = begentle
90
self.started_cgi_body = False
91
self.got_cgi_headers = False
92
self.wrote_html_warning = False
94
self.headers = {} # Header names : values
96
def execute_cgi(interpreter, owner, jail_dir, working_dir, script_path,
97
req, gentle, overrides=None):
99
trampoline: Full path on the local system to the CGI wrapper program
101
owner: User object of the owner of the file.
102
jail_dir: Absolute path of owner's jail directory.
103
working_dir: Directory containing the script file relative to owner's
105
script_path: CGI script relative to the owner's jail.
106
req: IVLE request object.
108
overrides: A dict mapping env var names to strings, to override arbitrary
109
environment variables in the resulting CGI environent.
111
The called CGI wrapper application shall be called using popen and receive
112
the HTTP body on stdin. It shall receive the CGI environment variables to
116
trampoline = os.path.join(req.config['paths']['lib'], 'trampoline')
118
# Support no-op trampoline runs.
119
if interpreter is None:
120
interpreter = '/bin/true'
126
# Get the student program's directory and execute it from that context.
127
(tramp_dir, _) = os.path.split(trampoline)
129
# TODO: Don't create a file if the body length is known to be 0
130
# Write the HTTP body to a temporary file so it can be passed as a *real*
133
body = req.read() if not noop else None
137
f.seek(0) # Rewind, for reading
139
# Set up the environment
140
environ = cgi_environ(req, script_path, owner, overrides=overrides)
142
# usage: tramp uid jail_dir working_dir script_path
143
cmd_line = [trampoline, str(owner.unixid),
144
req.config['paths']['jails']['mounts'],
145
req.config['paths']['jails']['src'],
146
req.config['paths']['jails']['template'],
147
jail_dir, working_dir, interpreter, script_path]
148
# Popen doesn't like unicode strings. It hateses them.
149
cmd_line = [(s.encode('utf-8') if isinstance(s, unicode) else s)
151
pid = subprocess.Popen(cmd_line,
152
stdin=f, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
153
cwd=tramp_dir, env=environ)
155
# We don't want any output! Bail out after the process terminates.
160
# process_cgi_line: Reads a single line of CGI output and processes it.
161
# Prints to req, and also does fancy HTML warnings if Content-Type
163
cgiflags = CGIFlags(gentle)
165
# Read from the process's stdout into req
166
data = pid.stdout.read(CGI_BLOCK_SIZE)
168
process_cgi_output(req, data, cgiflags)
169
data = pid.stdout.read(CGI_BLOCK_SIZE)
171
# If we haven't processed headers yet, now is a good time
172
if not cgiflags.started_cgi_body:
173
process_cgi_output(req, '\n', cgiflags)
175
# If we wrote an HTML warning header, write the footer
176
if cgiflags.wrote_html_warning:
182
def process_cgi_output(req, data, cgiflags):
183
"""Processes a chunk of CGI output. data is a string of arbitrary length;
184
some arbitrary chunk of output written by the CGI script."""
185
if cgiflags.started_cgi_body:
186
if cgiflags.wrote_html_warning:
187
# HTML escape text if wrote_html_warning
188
req.write(cgi.escape(data))
192
# Break data into lines of CGI header data.
193
linebuf = cgiflags.linebuf + data
194
# First see if we can split all header data
195
# We need to get the double CRLF- or LF-terminated headers, whichever
196
# is smaller, as either sequence may appear somewhere in the body.
197
usplit = linebuf.split('\n\n', 1)
198
wsplit = linebuf.split('\r\n\r\n', 1)
199
split = len(usplit[0]) > len(wsplit[0]) and wsplit or usplit
201
# Haven't seen all headers yet. Buffer and come back later.
202
cgiflags.linebuf = linebuf
207
cgiflags.linebuf = ""
208
cgiflags.started_cgi_body = True
209
# Process all the header lines
210
split = headers.split('\r\n', 1)
212
split = headers.split('\n', 1)
214
process_cgi_header_line(req, split[0], cgiflags)
215
if len(split) == 1: break
217
if cgiflags.wrote_html_warning:
218
# We're done with headers. Treat the rest as data.
219
data = headers + '\n' + data
221
split = headers.split('\r\n', 1)
223
split = headers.split('\n', 1)
225
# If not executing in gentle mode (which presents CGI violations
226
# to users nicely), check if this an internal IVLE error
228
if not cgiflags.gentle:
229
hs = cgiflags.headers
230
if 'X-IVLE-Error-Type' in hs:
232
raise IVLEJailError(hs['X-IVLE-Error-Type'],
233
hs['X-IVLE-Error-Message'],
234
hs['X-IVLE-Error-Info'])
236
raise AssertionError("Bad error headers written by CGI.")
238
# Check to make sure the required headers were written
239
if cgiflags.wrote_html_warning or not cgiflags.gentle:
240
# We already reported an error, that's enough
242
elif "Content-Type" in cgiflags.headers:
244
elif "Location" in cgiflags.headers:
245
if ("Status" in cgiflags.headers and req.status >= 300
246
and req.status < 400):
249
message = """You did not write a valid status code for
250
the given location. To make a redirect, you may wish to try:</p>
251
<pre style="margin-left: 1em">Status: 302 Found
252
Location: <redirect address></pre>"""
253
write_html_warning(req, message)
254
cgiflags.wrote_html_warning = True
256
message = """You did not print a Content-Type header.
257
CGI requires that you print a "Content-Type". You may wish to try:</p>
258
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
259
write_html_warning(req, message)
260
cgiflags.wrote_html_warning = True
262
# Call myself to flush out the extra bit of data we read
263
process_cgi_output(req, data, cgiflags)
265
def process_cgi_header_line(req, line, cgiflags):
266
"""Process a line of CGI header data. line is a string representing a
267
complete line of text, stripped and without the newline.
270
name, value = line.split(':', 1)
272
# No colon. The user did not write valid headers.
273
# If we are being gentle, we want to help the user understand what
274
# went wrong. Otherwise, just admit we screwed up.
276
if not cgiflags.gentle:
277
message = """An unexpected server error has occured."""
279
elif len(cgiflags.headers) == 0:
280
# First line was not a header line. We can assume this is not
282
message = """You did not print a CGI header.
283
CGI requires that you print a "Content-Type". You may wish to try:</p>
284
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
286
# They printed some header at least, but there was an invalid
288
message = """You printed an invalid CGI header. You need to leave
289
a blank line after the headers, before writing the page contents."""
290
write_html_warning(req, message, warning=warning)
291
cgiflags.wrote_html_warning = True
292
# Handle the rest of this line as normal data
293
process_cgi_output(req, line + '\n', cgiflags)
296
# Check if CGI field-name is valid
297
CGI_SEPERATORS = set(['(', ')', '<', '>', '@', ',', ';', ':', '\\', '"',
298
'/', '[', ']', '?', '=', '{', '}', ' ', '\t'])
299
if any((char in CGI_SEPERATORS for char in name)):
301
if not cgiflags.gentle:
302
message = """An unexpected server error has occured."""
305
# Header contained illegal characters
306
message = """You printed an invalid CGI header. CGI header
307
field-names can not contain any of the following characters:
308
<code>( ) < > @ , ; : \\ " / [ ] ? = { } <em>SPACE
310
write_html_warning(req, message, warning=warning)
311
cgiflags.wrote_html_warning = True
312
# Handle the rest of this line as normal data
313
process_cgi_output(req, line + '\n', cgiflags)
317
value = value.strip()
318
if name == "Content-Type":
319
req.content_type = value
320
elif name == "Location":
322
elif name == "Status":
323
# Must be an integer, followed by a space, and then the status line
324
# which we ignore (seems like Apache has no way to send a custom
327
req.status = int(value.split(' ', 1)[0])
329
if not cgiflags.gentle:
330
# This isn't user code, so it should be good.
331
# Get us out of here!
333
message = """The "Status" CGI header was invalid. You need to
334
print a number followed by a message, such as "302 Found"."""
335
write_html_warning(req, message)
336
cgiflags.wrote_html_warning = True
337
# Handle the rest of this line as normal data
338
process_cgi_output(req, line + '\n', cgiflags)
340
# Generic HTTP header
341
# FIXME: Security risk letting users write arbitrary headers?
342
req.headers_out.add(name, value)
343
cgiflags.headers[name] = value # FIXME: Only the last header will end up here.
345
def write_html_warning(req, text, warning="Warning"):
346
"""Prints an HTML warning about invalid CGI interaction on the part of the
347
user. text may contain HTML markup."""
348
req.content_type = "text/html"
349
req.write("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
350
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
351
<html xmlns="http://www.w3.org/1999/xhtml">
353
<meta http-equiv="Content-Type"
354
content="text/html; charset=utf-8" />
356
<body style="margin: 0; padding: 0; font-family: sans-serif;">
357
<div style="background-color: #faa; border-bottom: 1px solid black;
359
<p><strong>%s</strong>: %s
361
<div style="margin: 8px;">
363
""" % (warning, text))
365
# Mapping of interpreter names (as given in conf/app/server.py) to
366
# interpreter functions.
368
interpreter_objects = {
370
: functools.partial(execute_cgi, "/usr/bin/python"),
372
: functools.partial(execute_cgi, None),
378
def cgi_environ(req, script_path, user, overrides=None):
379
"""Gets CGI variables from apache and makes a few changes for security and
382
Does not modify req, only reads it.
384
overrides: A dict mapping env var names to strings, to override arbitrary
385
environment variables in the resulting CGI environent.
388
# Comments here are on the heavy side, explained carefully for security
389
# reasons. Please read carefully before making changes.
391
# This automatically asks mod_python to load up the CGI variables into the
392
# environment (which is a good first approximation)
393
for (k,v) in req.get_cgi_environ().items():
396
# Remove DOCUMENT_ROOT and SCRIPT_FILENAME. Not part of CGI spec and
397
# exposes unnecessary details about server.
399
del env['DOCUMENT_ROOT']
402
del env['SCRIPT_FILENAME']
405
# Remove PATH. The PATH here is the path on the server machine; not useful
406
# inside the jail. It may be a good idea to add another path, reflecting
407
# the inside of the jail, but not done at this stage.
412
# CGI specifies that REMOTE_HOST SHOULD be set, and MAY just be set to
413
# REMOTE_ADDR. Since Apache does not appear to set this, set it to
415
if 'REMOTE_HOST' not in env and 'REMOTE_ADDR' in env:
416
env['REMOTE_HOST'] = env['REMOTE_ADDR']
418
env['PATH_INFO'] = ''
419
del env['PATH_TRANSLATED']
421
normuri = os.path.normpath(req.uri)
422
env['SCRIPT_NAME'] = normuri
424
# SCRIPT_NAME is the path to the script WITHOUT PATH_INFO.
425
# We don't care about these if the script is null (ie. noop).
426
# XXX: We check for /home because we don't want to interfere with
427
# CGIRequest, which fileservice still uses.
428
if script_path and script_path.startswith('/home'):
429
normscript = os.path.normpath(script_path)
431
uri_into_jail = studpath.to_home_path(os.path.normpath(req.path))
433
# PATH_INFO is wrong because the script doesn't physically exist.
434
env['PATH_INFO'] = uri_into_jail[len(normscript):]
435
if len(env['PATH_INFO']) > 0:
436
env['SCRIPT_NAME'] = normuri[:-len(env['PATH_INFO'])]
438
# SERVER_SOFTWARE is actually not Apache but IVLE, since we are
439
# custom-making the CGI request.
440
env['SERVER_SOFTWARE'] = "IVLE/" + ivle.__version__
442
# Additional environment variables
443
username = user.login
444
env['HOME'] = os.path.join('/home', username)
446
if overrides is not None:
447
env.update(overrides)
450
class ExecutionError(Exception):
453
def execute_raw(config, user, jail_dir, working_dir, binary, args):
454
'''Execute a binary in a user's jail, returning the raw output.
456
The binary is executed in the given working directory with the given
457
args. A tuple of (stdout, stderr) is returned.
460
tramp = os.path.join(config['paths']['lib'], 'trampoline')
461
tramp_dir = os.path.split(tramp)[0]
463
# Fire up trampoline. Vroom, vroom.
464
cmd_line = [tramp, str(user.unixid), config['paths']['jails']['mounts'],
465
config['paths']['jails']['src'],
466
config['paths']['jails']['template'],
467
jail_dir, working_dir, binary] + args
468
# Popen doesn't like unicode strings. It hateses them.
469
cmd_line = [(s.encode('utf-8') if isinstance(s, unicode) else s)
471
proc = subprocess.Popen(cmd_line,
472
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
473
stderr=subprocess.PIPE, cwd=tramp_dir, close_fds=True,
474
env={'HOME': os.path.join('/home', user.login),
477
'LOGNAME': user.login})
479
(stdout, stderr) = proc.communicate()
480
exitcode = proc.returncode
483
raise ExecutionError('subprocess ended with code %d, stderr: "%s"' %
485
return (stdout, stderr)