1079
by William Grant
Merge setup-refactor branch. This completely breaks existing installations; |
1 |
# IVLE
|
2 |
# Copyright (C) 2007-2008 The University of Melbourne
|
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
16 |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
17 |
||
18 |
# Module: Interpret
|
|
19 |
# Author: Matt Giuca
|
|
20 |
# Date: 18/1/2008
|
|
21 |
||
22 |
# Runs a student script in a safe execution environment.
|
|
23 |
||
24 |
from ivle import studpath |
|
25 |
from ivle import db |
|
26 |
from ivle.util import IVLEError, IVLEJailError |
|
27 |
import ivle.conf |
|
28 |
||
29 |
import functools |
|
30 |
||
31 |
import os |
|
32 |
import pwd |
|
33 |
import subprocess |
|
34 |
import cgi |
|
35 |
||
36 |
# TODO: Make progressive output work
|
|
37 |
# Question: Will having a large buffer size stop progressive output from
|
|
38 |
# working on smaller output
|
|
39 |
||
40 |
CGI_BLOCK_SIZE = 65535 |
|
41 |
||
42 |
uids = {} |
|
43 |
||
44 |
def get_uid(login): |
|
45 |
"""Get the unix uid corresponding to the given login name.
|
|
46 |
If it is not in the dictionary of uids, then consult the
|
|
47 |
database and retrieve an update of the user table."""
|
|
48 |
global uids |
|
49 |
if login in uids: |
|
50 |
return uids[login] |
|
51 |
||
52 |
conn = db.DB() |
|
53 |
res = conn.get_all('login', ['login', 'unixid']) |
|
54 |
def repack(flds): |
|
55 |
return (flds['login'], flds['unixid']) |
|
56 |
uids = dict(map(repack,res)) |
|
57 |
||
58 |
return uids[login] |
|
59 |
||
60 |
def interpret_file(req, owner, jail_dir, filename, interpreter, gentle=True): |
|
61 |
"""Serves a file by interpreting it using one of IVLE's builtin
|
|
62 |
interpreters. All interpreters are intended to run in the user's jail. The
|
|
63 |
jail location is provided as an argument to the interpreter but it is up
|
|
64 |
to the individual interpreters to create the jail.
|
|
65 |
||
66 |
req: An IVLE request object.
|
|
67 |
owner: Username of the user who owns the file being served.
|
|
68 |
jail_dir: Absolute path to the user's jail.
|
|
69 |
filename: Absolute filename within the user's jail.
|
|
70 |
interpreter: A function object to call.
|
|
71 |
"""
|
|
72 |
# We can't test here whether or not the target file actually exists,
|
|
73 |
# because the apache user may not have permission. Instead we have to
|
|
74 |
# rely on the interpreter generating an error.
|
|
75 |
if filename.startswith(os.sep): |
|
76 |
filename_abs = filename |
|
77 |
filename_rel = filename[1:] |
|
78 |
else: |
|
79 |
filename_abs = os.path.join(os.sep, filename) |
|
80 |
filename_rel = filename |
|
81 |
||
82 |
# Get the UID of the owner of the file
|
|
83 |
# (Note: files are executed by their owners, not the logged in user.
|
|
84 |
# This ensures users are responsible for their own programs and also
|
|
85 |
# allows them to be executed by the public).
|
|
86 |
uid = get_uid(owner) |
|
87 |
||
88 |
# Split up req.path again, this time with respect to the jail
|
|
89 |
(working_dir, _) = os.path.split(filename_abs) |
|
90 |
# jail_dir is the absolute jail directory.
|
|
91 |
# path is the filename relative to the user's jail.
|
|
92 |
# working_dir is the directory containing the file relative to the user's
|
|
93 |
# jail.
|
|
94 |
# (Note that paths "relative" to the jail actually begin with a '/' as
|
|
95 |
# they are absolute in the jailspace)
|
|
96 |
||
97 |
return interpreter(uid, jail_dir, working_dir, filename_abs, req, |
|
98 |
gentle) |
|
99 |
||
100 |
class CGIFlags: |
|
101 |
"""Stores flags regarding the state of reading CGI output.
|
|
102 |
If this is to be gentle, detection of invalid headers will result in an
|
|
103 |
HTML warning."""
|
|
104 |
def __init__(self, begentle=True): |
|
105 |
self.gentle = begentle |
|
106 |
self.started_cgi_body = False |
|
107 |
self.got_cgi_headers = False |
|
108 |
self.wrote_html_warning = False |
|
109 |
self.linebuf = "" |
|
110 |
self.headers = {} # Header names : values |
|
111 |
||
112 |
def execute_cgi(interpreter, trampoline, uid, jail_dir, working_dir, |
|
113 |
script_path, req, gentle): |
|
114 |
"""
|
|
115 |
trampoline: Full path on the local system to the CGI wrapper program
|
|
116 |
being executed.
|
|
117 |
uid: User ID of the owner of the file.
|
|
118 |
jail_dir: Absolute path of owner's jail directory.
|
|
119 |
working_dir: Directory containing the script file relative to owner's
|
|
120 |
jail.
|
|
121 |
script_path: CGI script relative to the owner's jail.
|
|
122 |
req: IVLE request object.
|
|
123 |
||
124 |
The called CGI wrapper application shall be called using popen and receive
|
|
125 |
the HTTP body on stdin. It shall receive the CGI environment variables to
|
|
126 |
its environment.
|
|
127 |
"""
|
|
128 |
||
129 |
# Support no-op trampoline runs.
|
|
130 |
if interpreter is None: |
|
131 |
interpreter = '/bin/true' |
|
132 |
script_path = '' |
|
133 |
noop = True |
|
134 |
else: |
|
135 |
noop = False |
|
136 |
||
137 |
# Get the student program's directory and execute it from that context.
|
|
138 |
(tramp_dir, _) = os.path.split(trampoline) |
|
139 |
||
140 |
# TODO: Don't create a file if the body length is known to be 0
|
|
141 |
# Write the HTTP body to a temporary file so it can be passed as a *real*
|
|
142 |
# file to popen.
|
|
143 |
f = os.tmpfile() |
|
144 |
body = req.read() if not noop else None |
|
145 |
if body is not None: |
|
146 |
f.write(body) |
|
147 |
f.flush() |
|
148 |
f.seek(0) # Rewind, for reading |
|
149 |
||
150 |
# Set up the environment
|
|
151 |
# This automatically asks mod_python to load up the CGI variables into the
|
|
152 |
# environment (which is a good first approximation)
|
|
153 |
old_env = os.environ.copy() |
|
154 |
for k in os.environ.keys(): |
|
155 |
del os.environ[k] |
|
156 |
for (k,v) in req.get_cgi_environ().items(): |
|
157 |
os.environ[k] = v |
|
158 |
fixup_environ(req) |
|
159 |
||
160 |
# usage: tramp uid jail_dir working_dir script_path
|
|
161 |
pid = subprocess.Popen( |
|
162 |
[trampoline, str(uid), jail_dir, working_dir, interpreter, |
|
163 |
script_path], |
|
164 |
stdin=f, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, |
|
165 |
cwd=tramp_dir) |
|
166 |
||
167 |
# Restore the environment
|
|
168 |
for k in os.environ.keys(): |
|
169 |
del os.environ[k] |
|
170 |
for (k,v) in old_env.items(): |
|
171 |
os.environ[k] = v |
|
172 |
||
173 |
# We don't want any output! Bail out after the process terminates.
|
|
174 |
if noop: |
|
175 |
pid.communicate() |
|
176 |
return
|
|
177 |
||
178 |
# process_cgi_line: Reads a single line of CGI output and processes it.
|
|
179 |
# Prints to req, and also does fancy HTML warnings if Content-Type
|
|
180 |
# omitted.
|
|
181 |
cgiflags = CGIFlags(gentle) |
|
182 |
||
183 |
# Read from the process's stdout into req
|
|
184 |
data = pid.stdout.read(CGI_BLOCK_SIZE) |
|
185 |
while len(data) > 0: |
|
186 |
process_cgi_output(req, data, cgiflags) |
|
187 |
data = pid.stdout.read(CGI_BLOCK_SIZE) |
|
188 |
||
189 |
# If we haven't processed headers yet, now is a good time
|
|
190 |
if not cgiflags.started_cgi_body: |
|
191 |
process_cgi_output(req, '\n', cgiflags) |
|
192 |
||
193 |
# If we wrote an HTML warning header, write the footer
|
|
194 |
if cgiflags.wrote_html_warning: |
|
195 |
req.write("""</pre> |
|
196 |
</div>
|
|
197 |
</body>
|
|
198 |
</html>""") |
|
199 |
||
200 |
def process_cgi_output(req, data, cgiflags): |
|
201 |
"""Processes a chunk of CGI output. data is a string of arbitrary length;
|
|
202 |
some arbitrary chunk of output written by the CGI script."""
|
|
203 |
if cgiflags.started_cgi_body: |
|
204 |
if cgiflags.wrote_html_warning: |
|
205 |
# HTML escape text if wrote_html_warning
|
|
206 |
req.write(cgi.escape(data)) |
|
207 |
else: |
|
208 |
req.write(data) |
|
209 |
else: |
|
210 |
# Break data into lines of CGI header data.
|
|
211 |
linebuf = cgiflags.linebuf + data |
|
212 |
# First see if we can split all header data
|
|
213 |
# We need to get the double CRLF- or LF-terminated headers, whichever
|
|
214 |
# is smaller, as either sequence may appear somewhere in the body.
|
|
215 |
usplit = linebuf.split('\n\n', 1) |
|
216 |
wsplit = linebuf.split('\r\n\r\n', 1) |
|
217 |
split = len(usplit[0]) > len(wsplit[0]) and wsplit or usplit |
|
218 |
if len(split) == 1: |
|
219 |
# Haven't seen all headers yet. Buffer and come back later.
|
|
220 |
cgiflags.linebuf = linebuf |
|
221 |
return
|
|
222 |
||
223 |
headers = split[0] |
|
224 |
data = split[1] |
|
225 |
cgiflags.linebuf = "" |
|
226 |
cgiflags.started_cgi_body = True |
|
227 |
# Process all the header lines
|
|
228 |
split = headers.split('\r\n', 1) |
|
229 |
if len(split) == 1: |
|
230 |
split = headers.split('\n', 1) |
|
231 |
while True: |
|
232 |
process_cgi_header_line(req, split[0], cgiflags) |
|
233 |
if len(split) == 1: break |
|
234 |
headers = split[1] |
|
235 |
if cgiflags.wrote_html_warning: |
|
236 |
# We're done with headers. Treat the rest as data.
|
|
237 |
data = headers + '\n' + data |
|
238 |
break
|
|
239 |
split = headers.split('\r\n', 1) |
|
240 |
if len(split) == 1: |
|
241 |
split = headers.split('\n', 1) |
|
242 |
||
243 |
# Is this an internal IVLE error condition?
|
|
244 |
hs = cgiflags.headers |
|
245 |
if 'X-IVLE-Error-Type' in hs: |
|
246 |
t = hs['X-IVLE-Error-Type'] |
|
247 |
if t == IVLEError.__name__: |
|
248 |
raise IVLEError(int(hs['X-IVLE-Error-Code']), |
|
249 |
hs['X-IVLE-Error-Message']) |
|
250 |
else: |
|
251 |
try: |
|
252 |
raise IVLEJailError(hs['X-IVLE-Error-Type'], |
|
253 |
hs['X-IVLE-Error-Message'], |
|
254 |
hs['X-IVLE-Error-Info']) |
|
255 |
except KeyError: |
|
256 |
raise IVLEError(500, 'bad error headers written by CGI') |
|
257 |
||
258 |
# Check to make sure the required headers were written
|
|
259 |
if cgiflags.wrote_html_warning or not cgiflags.gentle: |
|
260 |
# We already reported an error, that's enough
|
|
261 |
pass
|
|
262 |
elif "Content-Type" in cgiflags.headers: |
|
263 |
pass
|
|
264 |
elif "Location" in cgiflags.headers: |
|
265 |
if ("Status" in cgiflags.headers and req.status >= 300 |
|
266 |
and req.status < 400): |
|
267 |
pass
|
|
268 |
else: |
|
269 |
message = """You did not write a valid status code for |
|
270 |
the given location. To make a redirect, you may wish to try:</p>
|
|
271 |
<pre style="margin-left: 1em">Status: 302 Found
|
|
272 |
Location: <redirect address></pre>"""
|
|
273 |
write_html_warning(req, message) |
|
274 |
cgiflags.wrote_html_warning = True |
|
275 |
else: |
|
276 |
message = """You did not print a Content-Type header. |
|
277 |
CGI requires that you print a "Content-Type". You may wish to try:</p>
|
|
278 |
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
|
|
279 |
write_html_warning(req, message) |
|
280 |
cgiflags.wrote_html_warning = True |
|
281 |
||
282 |
# Call myself to flush out the extra bit of data we read
|
|
283 |
process_cgi_output(req, data, cgiflags) |
|
284 |
||
285 |
def process_cgi_header_line(req, line, cgiflags): |
|
286 |
"""Process a line of CGI header data. line is a string representing a
|
|
287 |
complete line of text, stripped and without the newline.
|
|
288 |
"""
|
|
289 |
try: |
|
290 |
name, value = line.split(':', 1) |
|
291 |
except ValueError: |
|
292 |
# No colon. The user did not write valid headers.
|
|
293 |
# If we are being gentle, we want to help the user understand what
|
|
294 |
# went wrong. Otherwise, just admit we screwed up.
|
|
295 |
warning = "Warning" |
|
296 |
if not cgiflags.gentle: |
|
297 |
message = """An unexpected server error has occured.""" |
|
298 |
warning = "Error" |
|
299 |
elif len(cgiflags.headers) == 0: |
|
300 |
# First line was not a header line. We can assume this is not
|
|
301 |
# a CGI app.
|
|
302 |
message = """You did not print a CGI header. |
|
303 |
CGI requires that you print a "Content-Type". You may wish to try:</p>
|
|
304 |
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
|
|
305 |
else: |
|
306 |
# They printed some header at least, but there was an invalid
|
|
307 |
# header.
|
|
308 |
message = """You printed an invalid CGI header. You need to leave |
|
309 |
a blank line after the headers, before writing the page contents."""
|
|
310 |
write_html_warning(req, message, warning=warning) |
|
311 |
cgiflags.wrote_html_warning = True |
|
312 |
# Handle the rest of this line as normal data
|
|
313 |
process_cgi_output(req, line + '\n', cgiflags) |
|
314 |
return
|
|
315 |
||
316 |
# Read CGI headers
|
|
317 |
value = value.strip() |
|
318 |
if name == "Content-Type": |
|
319 |
req.content_type = value |
|
320 |
elif name == "Location": |
|
321 |
req.location = value |
|
322 |
elif name == "Status": |
|
323 |
# Must be an integer, followed by a space, and then the status line
|
|
324 |
# which we ignore (seems like Apache has no way to send a custom
|
|
325 |
# status line).
|
|
326 |
try: |
|
327 |
req.status = int(value.split(' ', 1)[0]) |
|
328 |
except ValueError: |
|
329 |
if not cgiflags.gentle: |
|
330 |
# This isn't user code, so it should be good.
|
|
331 |
# Get us out of here!
|
|
332 |
raise
|
|
333 |
message = """The "Status" CGI header was invalid. You need to |
|
334 |
print a number followed by a message, such as "302 Found"."""
|
|
335 |
write_html_warning(req, message) |
|
336 |
cgiflags.wrote_html_warning = True |
|
337 |
# Handle the rest of this line as normal data
|
|
338 |
process_cgi_output(req, line + '\n', cgiflags) |
|
339 |
else: |
|
340 |
# Generic HTTP header
|
|
341 |
# FIXME: Security risk letting users write arbitrary headers?
|
|
342 |
req.headers_out.add(name, value) |
|
343 |
cgiflags.headers[name] = value # FIXME: Only the last header will end up here. |
|
344 |
||
345 |
def write_html_warning(req, text, warning="Warning"): |
|
346 |
"""Prints an HTML warning about invalid CGI interaction on the part of the
|
|
347 |
user. text may contain HTML markup."""
|
|
348 |
req.content_type = "text/html" |
|
349 |
req.write("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
|
350 |
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
351 |
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
352 |
<head>
|
|
353 |
<meta http-equiv="Content-Type"
|
|
354 |
content="text/html; charset=utf-8" />
|
|
355 |
</head>
|
|
356 |
<body style="margin: 0; padding: 0; font-family: sans-serif;">
|
|
357 |
<div style="background-color: #faa; border-bottom: 1px solid black;
|
|
358 |
padding: 8px;">
|
|
359 |
<p><strong>%s</strong>: %s |
|
360 |
</div>
|
|
361 |
<div style="margin: 8px;">
|
|
362 |
<pre>
|
|
363 |
""" % (warning, text)) |
|
364 |
||
365 |
location_cgi_python = os.path.join(ivle.conf.lib_path, "trampoline") |
|
366 |
||
367 |
# Mapping of interpreter names (as given in conf/app/server.py) to
|
|
368 |
# interpreter functions.
|
|
369 |
||
370 |
interpreter_objects = { |
|
371 |
'cgi-python'
|
|
372 |
: functools.partial(execute_cgi, "/usr/bin/python", |
|
373 |
location_cgi_python), |
|
374 |
'noop'
|
|
375 |
: functools.partial(execute_cgi, None, |
|
376 |
location_cgi_python), |
|
377 |
# Should also have:
|
|
378 |
# cgi-generic
|
|
379 |
# python-server-page
|
|
380 |
}
|
|
381 |
||
382 |
def fixup_environ(req): |
|
383 |
"""Assuming os.environ has been written with the CGI variables from
|
|
384 |
apache, make a few changes for security and correctness.
|
|
385 |
||
386 |
Does not modify req, only reads it.
|
|
387 |
"""
|
|
388 |
env = os.environ |
|
389 |
# Comments here are on the heavy side, explained carefully for security
|
|
390 |
# reasons. Please read carefully before making changes.
|
|
391 |
||
392 |
# Remove DOCUMENT_ROOT and SCRIPT_FILENAME. Not part of CGI spec and
|
|
393 |
# exposes unnecessary details about server.
|
|
394 |
try: |
|
395 |
del env['DOCUMENT_ROOT'] |
|
396 |
except: pass |
|
397 |
try: |
|
398 |
del env['SCRIPT_FILENAME'] |
|
399 |
except: pass |
|
400 |
||
401 |
# Remove PATH. The PATH here is the path on the server machine; not useful
|
|
402 |
# inside the jail. It may be a good idea to add another path, reflecting
|
|
403 |
# the inside of the jail, but not done at this stage.
|
|
404 |
try: |
|
405 |
del env['PATH'] |
|
406 |
except: pass |
|
407 |
||
408 |
# Remove SCRIPT_FILENAME. Not part of CGI spec (see SCRIPT_NAME).
|
|
409 |
||
410 |
# PATH_INFO is wrong because the script doesn't physically exist.
|
|
411 |
# Apache makes it relative to the "serve" app. It should actually be made
|
|
412 |
# relative to the student's script. intepretservice does that in the jail,
|
|
413 |
# so here we just clear it.
|
|
414 |
env['PATH_INFO'] = '' |
|
415 |
env['PATH_TRANSLATED'] = '' |
|
416 |
||
417 |
# CGI specifies that REMOTE_HOST SHOULD be set, and MAY just be set to
|
|
418 |
# REMOTE_ADDR. Since Apache does not appear to set this, set it to
|
|
419 |
# REMOTE_ADDR.
|
|
420 |
if 'REMOTE_HOST' not in env and 'REMOTE_ADDR' in env: |
|
421 |
env['REMOTE_HOST'] = env['REMOTE_ADDR'] |
|
422 |
||
423 |
# SCRIPT_NAME is the path to the script WITHOUT PATH_INFO.
|
|
424 |
script_name = req.uri |
|
425 |
env['SCRIPT_NAME'] = script_name |
|
426 |
||
427 |
# SERVER_SOFTWARE is actually not Apache but IVLE, since we are
|
|
428 |
# custom-making the CGI request.
|
|
429 |
env['SERVER_SOFTWARE'] = "IVLE/" + str(ivle.conf.ivle_version) |
|
430 |
||
431 |
# Additional environment variables
|
|
432 |
username = studpath.url_to_jailpaths(req.path)[0] |
|
433 |
env['HOME'] = os.path.join('/home', username) |