409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
1 |
# IVLE
|
2 |
# Copyright (C) 2007-2008 The University of Melbourne
|
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
16 |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
17 |
||
18 |
# Module: Interpret
|
|
19 |
# Author: Matt Giuca
|
|
20 |
# Date: 18/1/2008
|
|
21 |
||
22 |
# Runs a student script in a safe execution environment.
|
|
23 |
#
|
|
24 |
# NOTE: This script currently disables cookies. This means students will be
|
|
25 |
# unable to write session-based or stateful web applications. This is done for
|
|
26 |
# security reasons (we do not want the students to see the IVLE cookie of
|
|
27 |
# whoever is visiting their pages).
|
|
28 |
# This can be resolved but needs careful sanitisation. See fixup_environ.
|
|
29 |
||
30 |
from common import studpath |
|
632
by drtomc
Get the user id from a cached copy of database info. |
31 |
from common import db |
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
32 |
import conf |
33 |
import functools |
|
34 |
||
35 |
import os |
|
36 |
import pwd |
|
37 |
import subprocess |
|
38 |
import cgi |
|
39 |
||
40 |
# TODO: Make progressive output work
|
|
41 |
# Question: Will having a large buffer size stop progressive output from
|
|
42 |
# working on smaller output
|
|
43 |
||
44 |
CGI_BLOCK_SIZE = 65535 |
|
45 |
||
632
by drtomc
Get the user id from a cached copy of database info. |
46 |
uids = {} |
47 |
||
48 |
def get_uid(login): |
|
49 |
"""Get the unix uid corresponding to the given login name.
|
|
50 |
If it is not in the dictionary of uids, then consult the
|
|
51 |
database and retrieve an update of the user table."""
|
|
52 |
global uids |
|
53 |
if login in uids: |
|
54 |
return uids[login] |
|
55 |
||
56 |
conn = db.DB() |
|
57 |
res = conn.get_all('login', ['login', 'unixid']) |
|
58 |
def repack(flds): |
|
59 |
return (flds['login'], flds['unixid']) |
|
60 |
uids = dict(map(repack,res)) |
|
61 |
||
62 |
return uids[login] |
|
63 |
||
420
by mattgiuca
common.interpret: Changed interface (refactor). |
64 |
def interpret_file(req, owner, jail_dir, filename, interpreter): |
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
65 |
"""Serves a file by interpreting it using one of IVLE's builtin
|
66 |
interpreters. All interpreters are intended to run in the user's jail. The
|
|
67 |
jail location is provided as an argument to the interpreter but it is up
|
|
68 |
to the individual interpreters to create the jail.
|
|
69 |
||
70 |
req: An IVLE request object.
|
|
71 |
owner: Username of the user who owns the file being served.
|
|
420
by mattgiuca
common.interpret: Changed interface (refactor). |
72 |
jail_dir: Absolute path to the user's jail.
|
421
by mattgiuca
apps/fileservice: Added code to call interpret on the trampoline version of |
73 |
filename: Absolute filename within the user's jail.
|
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
74 |
interpreter: A function object to call.
|
75 |
"""
|
|
635
by drtomc
serve: Avoid a permissions problem where apache was giving a 404 because *it* |
76 |
# We can't test here whether or not the target file actually exists,
|
77 |
# because the apache user may not have permission. Instead we have to
|
|
78 |
# rely on the interpreter generating an error.
|
|
421
by mattgiuca
apps/fileservice: Added code to call interpret on the trampoline version of |
79 |
if filename.startswith(os.sep): |
80 |
filename_abs = filename |
|
81 |
filename_rel = filename[1:] |
|
82 |
else: |
|
83 |
filename_abs = os.path.join(os.sep, filename) |
|
84 |
filename_rel = filename |
|
85 |
||
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
86 |
# Get the UID of the owner of the file
|
87 |
# (Note: files are executed by their owners, not the logged in user.
|
|
88 |
# This ensures users are responsible for their own programs and also
|
|
89 |
# allows them to be executed by the public).
|
|
632
by drtomc
Get the user id from a cached copy of database info. |
90 |
uid = get_uid(owner) |
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
91 |
|
92 |
# Split up req.path again, this time with respect to the jail
|
|
421
by mattgiuca
apps/fileservice: Added code to call interpret on the trampoline version of |
93 |
(working_dir, _) = os.path.split(filename_abs) |
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
94 |
# jail_dir is the absolute jail directory.
|
95 |
# path is the filename relative to the user's jail.
|
|
96 |
# working_dir is the directory containing the file relative to the user's
|
|
97 |
# jail.
|
|
98 |
# (Note that paths "relative" to the jail actually begin with a '/' as
|
|
99 |
# they are absolute in the jailspace)
|
|
100 |
||
421
by mattgiuca
apps/fileservice: Added code to call interpret on the trampoline version of |
101 |
return interpreter(uid, jail_dir, working_dir, filename_abs, req) |
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
102 |
|
103 |
class CGIFlags: |
|
104 |
"""Stores flags regarding the state of reading CGI output."""
|
|
105 |
def __init__(self): |
|
106 |
self.started_cgi_body = False |
|
107 |
self.got_cgi_headers = False |
|
108 |
self.wrote_html_warning = False |
|
109 |
self.linebuf = "" |
|
110 |
self.headers = {} # Header names : values |
|
111 |
||
112 |
def execute_cgi(interpreter, trampoline, uid, jail_dir, working_dir, |
|
113 |
script_path, req): |
|
114 |
"""
|
|
115 |
trampoline: Full path on the local system to the CGI wrapper program
|
|
116 |
being executed.
|
|
117 |
uid: User ID of the owner of the file.
|
|
118 |
jail_dir: Absolute path of owner's jail directory.
|
|
119 |
working_dir: Directory containing the script file relative to owner's
|
|
120 |
jail.
|
|
121 |
script_path: CGI script relative to the owner's jail.
|
|
122 |
req: IVLE request object.
|
|
123 |
||
124 |
The called CGI wrapper application shall be called using popen and receive
|
|
125 |
the HTTP body on stdin. It shall receive the CGI environment variables to
|
|
126 |
its environment.
|
|
127 |
"""
|
|
128 |
||
129 |
# Get the student program's directory and execute it from that context.
|
|
130 |
(tramp_dir, _) = os.path.split(trampoline) |
|
131 |
||
132 |
# TODO: Don't create a file if the body length is known to be 0
|
|
133 |
# Write the HTTP body to a temporary file so it can be passed as a *real*
|
|
134 |
# file to popen.
|
|
135 |
f = os.tmpfile() |
|
136 |
body = req.read() |
|
137 |
if body is not None: |
|
138 |
f.write(body) |
|
139 |
f.flush() |
|
140 |
f.seek(0) # Rewind, for reading |
|
141 |
||
142 |
# Set up the environment
|
|
143 |
# This automatically asks mod_python to load up the CGI variables into the
|
|
144 |
# environment (which is a good first approximation)
|
|
145 |
old_env = os.environ.copy() |
|
146 |
for k in os.environ.keys(): |
|
147 |
del os.environ[k] |
|
148 |
for (k,v) in req.get_cgi_environ().items(): |
|
149 |
os.environ[k] = v |
|
150 |
fixup_environ(req) |
|
151 |
||
152 |
# usage: tramp uid jail_dir working_dir script_path
|
|
153 |
pid = subprocess.Popen( |
|
154 |
[trampoline, str(uid), jail_dir, working_dir, interpreter, |
|
155 |
script_path], |
|
156 |
stdin=f, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, |
|
157 |
cwd=tramp_dir) |
|
158 |
||
159 |
# Restore the environment
|
|
160 |
for k in os.environ.keys(): |
|
161 |
del os.environ[k] |
|
162 |
for (k,v) in old_env.items(): |
|
163 |
os.environ[k] = v |
|
164 |
||
165 |
# process_cgi_line: Reads a single line of CGI output and processes it.
|
|
166 |
# Prints to req, and also does fancy HTML warnings if Content-Type
|
|
167 |
# omitted.
|
|
168 |
cgiflags = CGIFlags() |
|
169 |
||
170 |
# Read from the process's stdout into req
|
|
171 |
data = pid.stdout.read(CGI_BLOCK_SIZE) |
|
172 |
while len(data) > 0: |
|
173 |
process_cgi_output(req, data, cgiflags) |
|
174 |
data = pid.stdout.read(CGI_BLOCK_SIZE) |
|
175 |
||
176 |
# If we haven't processed headers yet, now is a good time
|
|
177 |
if not cgiflags.started_cgi_body: |
|
178 |
process_cgi_output(req, '\n', cgiflags) |
|
179 |
||
180 |
# If we wrote an HTML warning header, write the footer
|
|
181 |
if cgiflags.wrote_html_warning: |
|
182 |
req.write("""</pre> |
|
183 |
</div>
|
|
184 |
</body>
|
|
185 |
</html>""") |
|
186 |
||
187 |
def process_cgi_output(req, data, cgiflags): |
|
188 |
"""Processes a chunk of CGI output. data is a string of arbitrary length;
|
|
189 |
some arbitrary chunk of output written by the CGI script."""
|
|
190 |
if cgiflags.started_cgi_body: |
|
191 |
if cgiflags.wrote_html_warning: |
|
192 |
# HTML escape text if wrote_html_warning
|
|
193 |
req.write(cgi.escape(data)) |
|
194 |
else: |
|
195 |
req.write(data) |
|
196 |
else: |
|
197 |
# Break data into lines of CGI header data.
|
|
198 |
linebuf = cgiflags.linebuf + data |
|
199 |
# First see if we can split all header data
|
|
753
by dcoles
Upload: Patch submitted by 'wagrant' to fix file upload of dos formated files |
200 |
# We need to get the double CRLF- or LF-terminated headers, whichever
|
201 |
# is smaller, as either sequence may appear somewhere in the body.
|
|
202 |
usplit = linebuf.split('\n\n', 1) |
|
203 |
wsplit = linebuf.split('\r\n\r\n', 1) |
|
204 |
split = len(usplit[0]) > len(wsplit[0]) and wsplit or usplit |
|
409
by mattgiuca
Moved www/conf and www/common to a new directory lib. This separates the "web" |
205 |
if len(split) == 1: |
206 |
# Haven't seen all headers yet. Buffer and come back later.
|
|
207 |
cgiflags.linebuf = linebuf |
|
208 |
return
|
|
209 |
||
210 |
headers = split[0] |
|
211 |
data = split[1] |
|
212 |
cgiflags.linebuf = "" |
|
213 |
cgiflags.started_cgi_body = True |
|
214 |
# Process all the header lines
|
|
215 |
split = headers.split('\r\n', 1) |
|
216 |
if len(split) == 1: |
|
217 |
split = headers.split('\n', 1) |
|
218 |
while True: |
|
219 |
process_cgi_header_line(req, split[0], cgiflags) |
|
220 |
if len(split) == 1: break |
|
221 |
headers = split[1] |
|
222 |
if cgiflags.wrote_html_warning: |
|
223 |
# We're done with headers. Treat the rest as data.
|
|
224 |
data = headers + '\n' + data |
|
225 |
break
|
|
226 |
split = headers.split('\r\n', 1) |
|
227 |
if len(split) == 1: |
|
228 |
split = headers.split('\n', 1) |
|
229 |
||
230 |
# Check to make sure the required headers were written
|
|
231 |
if cgiflags.wrote_html_warning: |
|
232 |
# We already reported an error, that's enough
|
|
233 |
pass
|
|
234 |
elif "Content-Type" in cgiflags.headers: |
|
235 |
pass
|
|
236 |
elif "Location" in cgiflags.headers: |
|
237 |
if ("Status" in cgiflags.headers and req.status >= 300 |
|
238 |
and req.status < 400): |
|
239 |
pass
|
|
240 |
else: |
|
241 |
message = """You did not write a valid status code for |
|
242 |
the given location. To make a redirect, you may wish to try:</p>
|
|
243 |
<pre style="margin-left: 1em">Status: 302 Found
|
|
244 |
Location: <redirect address></pre>"""
|
|
245 |
write_html_warning(req, message) |
|
246 |
cgiflags.wrote_html_warning = True |
|
247 |
else: |
|
248 |
message = """You did not print a Content-Type header. |
|
249 |
CGI requires that you print a "Content-Type". You may wish to try:</p>
|
|
250 |
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
|
|
251 |
write_html_warning(req, message) |
|
252 |
cgiflags.wrote_html_warning = True |
|
253 |
||
254 |
# Call myself to flush out the extra bit of data we read
|
|
255 |
process_cgi_output(req, data, cgiflags) |
|
256 |
||
257 |
def process_cgi_header_line(req, line, cgiflags): |
|
258 |
"""Process a line of CGI header data. line is a string representing a
|
|
259 |
complete line of text, stripped and without the newline.
|
|
260 |
"""
|
|
261 |
try: |
|
262 |
name, value = line.split(':', 1) |
|
263 |
except ValueError: |
|
264 |
# No colon. The user did not write valid headers.
|
|
265 |
if len(cgiflags.headers) == 0: |
|
266 |
# First line was not a header line. We can assume this is not
|
|
267 |
# a CGI app.
|
|
268 |
message = """You did not print a CGI header. |
|
269 |
CGI requires that you print a "Content-Type". You may wish to try:</p>
|
|
270 |
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
|
|
271 |
else: |
|
272 |
# They printed some header at least, but there was an invalid
|
|
273 |
# header.
|
|
274 |
message = """You printed an invalid CGI header. You need to leave |
|
275 |
a blank line after the headers, before writing the page contents."""
|
|
276 |
write_html_warning(req, message) |
|
277 |
cgiflags.wrote_html_warning = True |
|
278 |
# Handle the rest of this line as normal data
|
|
279 |
process_cgi_output(req, line + '\n', cgiflags) |
|
280 |
return
|
|
281 |
||
282 |
# Read CGI headers
|
|
283 |
value = value.strip() |
|
284 |
if name == "Content-Type": |
|
285 |
req.content_type = value |
|
286 |
elif name == "Location": |
|
287 |
req.location = value |
|
288 |
elif name == "Status": |
|
289 |
# Must be an integer, followed by a space, and then the status line
|
|
290 |
# which we ignore (seems like Apache has no way to send a custom
|
|
291 |
# status line).
|
|
292 |
try: |
|
293 |
req.status = int(value.split(' ', 1)[0]) |
|
294 |
except ValueError: |
|
295 |
message = """The "Status" CGI header was invalid. You need to |
|
296 |
print a number followed by a message, such as "302 Found"."""
|
|
297 |
write_html_warning(req, message) |
|
298 |
cgiflags.wrote_html_warning = True |
|
299 |
# Handle the rest of this line as normal data
|
|
300 |
process_cgi_output(req, line + '\n', cgiflags) |
|
301 |
else: |
|
302 |
# Generic HTTP header
|
|
303 |
# FIXME: Security risk letting users write arbitrary headers?
|
|
304 |
req.headers_out[name] = value |
|
305 |
cgiflags.headers[name] = value |
|
306 |
||
307 |
def write_html_warning(req, text): |
|
308 |
"""Prints an HTML warning about invalid CGI interaction on the part of the
|
|
309 |
user. text may contain HTML markup."""
|
|
310 |
req.content_type = "text/html" |
|
311 |
req.write("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
|
312 |
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
313 |
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
314 |
<head>
|
|
315 |
<meta http-equiv="Content-Type"
|
|
316 |
content="text/html; charset=utf-8" />
|
|
317 |
</head>
|
|
318 |
<body style="margin: 0; padding: 0; font-family: sans-serif;">
|
|
319 |
<div style="background-color: #faa; border-bottom: 1px solid black;
|
|
320 |
padding: 8px;">
|
|
321 |
<p><strong>Warning</strong>: %s |
|
322 |
</div>
|
|
323 |
<div style="margin: 8px;">
|
|
324 |
<pre>
|
|
325 |
""" % text) |
|
326 |
||
327 |
location_cgi_python = os.path.join(conf.ivle_install_dir, |
|
328 |
"bin/trampoline") |
|
329 |
||
330 |
# Mapping of interpreter names (as given in conf/app/server.py) to
|
|
331 |
# interpreter functions.
|
|
332 |
||
333 |
interpreter_objects = { |
|
334 |
'cgi-python'
|
|
335 |
: functools.partial(execute_cgi, "/usr/bin/python", |
|
336 |
location_cgi_python), |
|
337 |
# Should also have:
|
|
338 |
# cgi-generic
|
|
339 |
# python-server-page
|
|
340 |
}
|
|
341 |
||
342 |
def fixup_environ(req): |
|
343 |
"""Assuming os.environ has been written with the CGI variables from
|
|
344 |
apache, make a few changes for security and correctness.
|
|
345 |
||
346 |
Does not modify req, only reads it.
|
|
347 |
"""
|
|
348 |
env = os.environ |
|
349 |
# Comments here are on the heavy side, explained carefully for security
|
|
350 |
# reasons. Please read carefully before making changes.
|
|
351 |
||
352 |
# Remove HTTP_COOKIE. It is a security risk to have students see the IVLE
|
|
353 |
# cookie of their visitors.
|
|
354 |
try: |
|
355 |
del env['HTTP_COOKIE'] |
|
356 |
except: pass |
|
357 |
||
358 |
# Remove DOCUMENT_ROOT and SCRIPT_FILENAME. Not part of CGI spec and
|
|
359 |
# exposes unnecessary details about server.
|
|
360 |
try: |
|
361 |
del env['DOCUMENT_ROOT'] |
|
362 |
except: pass |
|
363 |
try: |
|
364 |
del env['SCRIPT_FILENAME'] |
|
365 |
except: pass |
|
366 |
||
367 |
# Remove PATH. The PATH here is the path on the server machine; not useful
|
|
368 |
# inside the jail. It may be a good idea to add another path, reflecting
|
|
369 |
# the inside of the jail, but not done at this stage.
|
|
370 |
try: |
|
371 |
del env['PATH'] |
|
372 |
except: pass |
|
373 |
||
374 |
# Remove SCRIPT_FILENAME. Not part of CGI spec (see SCRIPT_NAME).
|
|
375 |
||
376 |
# PATH_INFO is wrong because the script doesn't physically exist.
|
|
377 |
# Apache makes it relative to the "serve" app. It should actually be made
|
|
378 |
# relative to the student's script.
|
|
379 |
# TODO: At this stage, it is not possible to add a path after the script,
|
|
380 |
# so PATH_INFO is always "".
|
|
381 |
path_info = "" |
|
382 |
env['PATH_INFO'] = path_info |
|
383 |
||
384 |
# PATH_TRANSLATED currently points to a non-existant location within the
|
|
385 |
# local web server directory. Instead make it represent a path within the
|
|
386 |
# student jail.
|
|
387 |
(username, _, path_translated) = studpath.url_to_jailpaths(req.path) |
|
388 |
if len(path_translated) == 0 or path_translated[0] != os.sep: |
|
389 |
path_translated = os.sep + path_translated |
|
390 |
env['PATH_TRANSLATED'] = path_translated |
|
391 |
||
392 |
# CGI specifies that REMOTE_HOST SHOULD be set, and MAY just be set to
|
|
393 |
# REMOTE_ADDR. Since Apache does not appear to set this, set it to
|
|
394 |
# REMOTE_ADDR.
|
|
395 |
if 'REMOTE_HOST' not in env and 'REMOTE_ADDR' in env: |
|
396 |
env['REMOTE_HOST'] = env['REMOTE_ADDR'] |
|
397 |
||
398 |
# SCRIPT_NAME is the path to the script WITHOUT PATH_INFO.
|
|
399 |
script_name = req.uri |
|
400 |
if len(path_info) > 0: |
|
401 |
script_name = script_name[:-len(path_info)] |
|
402 |
env['SCRIPT_NAME'] = script_name |
|
403 |
||
404 |
# SERVER_SOFTWARE is actually not Apache but IVLE, since we are
|
|
405 |
# custom-making the CGI request.
|
|
406 |
env['SERVER_SOFTWARE'] = "IVLE/" + str(conf.ivle_version) |
|
407 |
||
408 |
# Additional environment variables
|
|
409 |
env['HOME'] = os.path.join('/home', username) |