93
by mattgiuca
New directory hierarchy. |
1 |
# IVLE
|
2 |
# Copyright (C) 2007-2008 The University of Melbourne
|
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
16 |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
17 |
||
18 |
# Module: Interpret
|
|
19 |
# Author: Matt Giuca
|
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
20 |
# Date: 18/1/2008
|
93
by mattgiuca
New directory hierarchy. |
21 |
|
22 |
# Runs a student script in a safe execution environment.
|
|
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
23 |
#
|
24 |
# NOTE: This script currently disables cookies. This means students will be
|
|
25 |
# unable to write session-based or stateful web applications. This is done for
|
|
26 |
# security reasons (we do not want the students to see the IVLE cookie of
|
|
27 |
# whoever is visiting their pages).
|
|
28 |
# This can be resolved but needs careful sanitisation. See fixup_environ.
|
|
93
by mattgiuca
New directory hierarchy. |
29 |
|
30 |
from common import studpath |
|
31 |
import conf |
|
32 |
import functools |
|
33 |
||
34 |
import os |
|
35 |
import pwd |
|
36 |
import subprocess |
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
37 |
import cgi |
38 |
||
39 |
# TODO: Make progressive output work
|
|
40 |
# Question: Will having a large buffer size stop progressive output from
|
|
41 |
# working on smaller output
|
|
93
by mattgiuca
New directory hierarchy. |
42 |
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
43 |
CGI_BLOCK_SIZE = 65535 |
44 |
||
93
by mattgiuca
New directory hierarchy. |
45 |
def interpret_file(req, owner, filename, interpreter): |
46 |
"""Serves a file by interpreting it using one of IVLE's builtin
|
|
47 |
interpreters. All interpreters are intended to run in the user's jail. The
|
|
48 |
jail location is provided as an argument to the interpreter but it is up
|
|
49 |
to the individual interpreters to create the jail.
|
|
50 |
||
51 |
req: An IVLE request object.
|
|
52 |
owner: Username of the user who owns the file being served.
|
|
53 |
filename: Filename in the local file system.
|
|
54 |
interpreter: A function object to call.
|
|
55 |
"""
|
|
56 |
# Make sure the file exists (otherwise some interpreters may not actually
|
|
57 |
# complain).
|
|
58 |
# Don't test for execute permission, that will only be required for
|
|
59 |
# certain interpreters.
|
|
60 |
if not os.access(filename, os.R_OK): |
|
61 |
req.throw_error(req.HTTP_NOT_FOUND) |
|
62 |
||
63 |
# Get the UID of the owner of the file
|
|
64 |
# (Note: files are executed by their owners, not the logged in user.
|
|
65 |
# This ensures users are responsible for their own programs and also
|
|
66 |
# allows them to be executed by the public).
|
|
67 |
try: |
|
68 |
(_,_,uid,_,_,_,_) = pwd.getpwnam(owner) |
|
69 |
except KeyError: |
|
70 |
# The user does not exist. This should have already failed the
|
|
71 |
# previous test.
|
|
72 |
req.throw_error(req.HTTP_INTERNAL_SERVER_ERROR) |
|
73 |
||
74 |
# Split up req.path again, this time with respect to the jail
|
|
75 |
(_, jail_dir, path) = studpath.url_to_jailpaths(req.path) |
|
76 |
path = os.path.join('/', path) |
|
77 |
(working_dir, _) = os.path.split(path) |
|
78 |
# jail_dir is the absolute jail directory.
|
|
79 |
# path is the filename relative to the user's jail.
|
|
80 |
# working_dir is the directory containing the file relative to the user's
|
|
81 |
# jail.
|
|
82 |
# (Note that paths "relative" to the jail actually begin with a '/' as
|
|
83 |
# they are absolute in the jailspace)
|
|
84 |
||
85 |
return interpreter(uid, jail_dir, working_dir, path, req) |
|
86 |
||
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
87 |
class CGIFlags: |
88 |
"""Stores flags regarding the state of reading CGI output."""
|
|
89 |
def __init__(self): |
|
90 |
self.started_cgi_body = False |
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
91 |
self.got_cgi_headers = False |
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
92 |
self.wrote_html_warning = False |
93 |
self.linebuf = "" |
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
94 |
self.headers = {} # Header names : values |
93
by mattgiuca
New directory hierarchy. |
95 |
|
98
by mattgiuca
common/interpret.py: Now passes the location of the python interpreter to the |
96 |
def execute_cgi(interpreter, trampoline, uid, jail_dir, working_dir, |
97 |
script_path, req): |
|
93
by mattgiuca
New directory hierarchy. |
98 |
"""
|
99 |
trampoline: Full path on the local system to the CGI wrapper program
|
|
100 |
being executed.
|
|
101 |
uid: User ID of the owner of the file.
|
|
102 |
jail_dir: Absolute path of owner's jail directory.
|
|
103 |
working_dir: Directory containing the script file relative to owner's
|
|
104 |
jail.
|
|
105 |
script_path: CGI script relative to the owner's jail.
|
|
106 |
req: IVLE request object.
|
|
107 |
||
108 |
The called CGI wrapper application shall be called using popen and receive
|
|
109 |
the HTTP body on stdin. It shall receive the CGI environment variables to
|
|
110 |
its environment.
|
|
111 |
"""
|
|
112 |
||
113 |
# Get the student program's directory and execute it from that context.
|
|
114 |
(tramp_dir, _) = os.path.split(trampoline) |
|
115 |
||
116 |
# TODO: Don't create a file if the body length is known to be 0
|
|
117 |
# Write the HTTP body to a temporary file so it can be passed as a *real*
|
|
118 |
# file to popen.
|
|
119 |
f = os.tmpfile() |
|
120 |
body = req.read() |
|
121 |
if body is not None: |
|
122 |
f.write(body) |
|
123 |
f.flush() |
|
124 |
f.seek(0) # Rewind, for reading |
|
125 |
||
247
by mattgiuca
request.py: Added get_cgi_environ method. This asks Apache to emulate CGI |
126 |
# Set up the environment
|
127 |
# This automatically asks mod_python to load up the CGI variables into the
|
|
128 |
# environment (which is a good first approximation)
|
|
129 |
old_env = os.environ.copy() |
|
130 |
for k in os.environ.keys(): |
|
131 |
del os.environ[k] |
|
132 |
for (k,v) in req.get_cgi_environ().items(): |
|
133 |
os.environ[k] = v |
|
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
134 |
fixup_environ(req) |
247
by mattgiuca
request.py: Added get_cgi_environ method. This asks Apache to emulate CGI |
135 |
|
93
by mattgiuca
New directory hierarchy. |
136 |
# usage: tramp uid jail_dir working_dir script_path
|
137 |
pid = subprocess.Popen( |
|
98
by mattgiuca
common/interpret.py: Now passes the location of the python interpreter to the |
138 |
[trampoline, str(uid), jail_dir, working_dir, interpreter, |
139 |
script_path], |
|
93
by mattgiuca
New directory hierarchy. |
140 |
stdin=f, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, |
141 |
cwd=tramp_dir) |
|
142 |
||
247
by mattgiuca
request.py: Added get_cgi_environ method. This asks Apache to emulate CGI |
143 |
# Restore the environment
|
144 |
for k in os.environ.keys(): |
|
145 |
del os.environ[k] |
|
146 |
for (k,v) in old_env.items(): |
|
147 |
os.environ[k] = v |
|
148 |
||
93
by mattgiuca
New directory hierarchy. |
149 |
# process_cgi_line: Reads a single line of CGI output and processes it.
|
150 |
# Prints to req, and also does fancy HTML warnings if Content-Type
|
|
151 |
# omitted.
|
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
152 |
cgiflags = CGIFlags() |
153 |
||
154 |
# Read from the process's stdout into req
|
|
155 |
data = pid.stdout.read(CGI_BLOCK_SIZE) |
|
156 |
while len(data) > 0: |
|
157 |
process_cgi_output(req, data, cgiflags) |
|
158 |
data = pid.stdout.read(CGI_BLOCK_SIZE) |
|
159 |
||
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
160 |
# If we haven't processed headers yet, now is a good time
|
161 |
if not cgiflags.started_cgi_body: |
|
162 |
process_cgi_output(req, '\n', cgiflags) |
|
163 |
||
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
164 |
# If we wrote an HTML warning header, write the footer
|
165 |
if cgiflags.wrote_html_warning: |
|
166 |
req.write("""</pre> |
|
167 |
</div>
|
|
168 |
</body>
|
|
169 |
</html>""") |
|
170 |
||
171 |
def process_cgi_output(req, data, cgiflags): |
|
172 |
"""Processes a chunk of CGI output. data is a string of arbitrary length;
|
|
173 |
some arbitrary chunk of output written by the CGI script."""
|
|
174 |
if cgiflags.started_cgi_body: |
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
175 |
if cgiflags.wrote_html_warning: |
176 |
# HTML escape text if wrote_html_warning
|
|
177 |
req.write(cgi.escape(data)) |
|
178 |
else: |
|
179 |
req.write(data) |
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
180 |
else: |
181 |
# Break data into lines of CGI header data.
|
|
182 |
linebuf = cgiflags.linebuf + data |
|
183 |
# First see if we can split all header data
|
|
184 |
split = linebuf.split('\r\n\r\n', 1) |
|
185 |
if len(split) == 1: |
|
186 |
# Allow UNIX newlines instead
|
|
187 |
split = linebuf.split('\n\n', 1) |
|
188 |
if len(split) == 1: |
|
189 |
# Haven't seen all headers yet. Buffer and come back later.
|
|
190 |
cgiflags.linebuf = linebuf |
|
191 |
return
|
|
192 |
||
193 |
headers = split[0] |
|
194 |
data = split[1] |
|
195 |
cgiflags.linebuf = "" |
|
196 |
cgiflags.started_cgi_body = True |
|
197 |
# Process all the header lines
|
|
198 |
split = headers.split('\r\n', 1) |
|
199 |
if len(split) == 1: |
|
200 |
split = headers.split('\n', 1) |
|
201 |
while True: |
|
202 |
process_cgi_header_line(req, split[0], cgiflags) |
|
203 |
if len(split) == 1: break |
|
204 |
headers = split[1] |
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
205 |
if cgiflags.wrote_html_warning: |
206 |
# We're done with headers. Treat the rest as data.
|
|
207 |
data = headers + '\n' + data |
|
208 |
break
|
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
209 |
split = headers.split('\r\n', 1) |
210 |
if len(split) == 1: |
|
211 |
split = headers.split('\n', 1) |
|
212 |
||
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
213 |
# Check to make sure the required headers were written
|
214 |
if cgiflags.wrote_html_warning: |
|
215 |
# We already reported an error, that's enough
|
|
216 |
pass
|
|
217 |
elif "Content-Type" in cgiflags.headers: |
|
218 |
pass
|
|
219 |
elif "Location" in cgiflags.headers: |
|
220 |
if ("Status" in cgiflags.headers and req.status >= 300 |
|
221 |
and req.status < 400): |
|
222 |
pass
|
|
223 |
else: |
|
224 |
message = """You did not write a valid status code for |
|
225 |
the given location. To make a redirect, you may wish to try:</p>
|
|
226 |
<pre style="margin-left: 1em">Status: 302 Found
|
|
227 |
Location: <redirect address></pre>"""
|
|
228 |
write_html_warning(req, message) |
|
229 |
cgiflags.wrote_html_warning = True |
|
230 |
else: |
|
231 |
message = """You did not print a Content-Type header. |
|
232 |
CGI requires that you print a "Content-Type". You may wish to try:</p>
|
|
233 |
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
|
|
234 |
write_html_warning(req, message) |
|
235 |
cgiflags.wrote_html_warning = True |
|
236 |
||
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
237 |
# Call myself to flush out the extra bit of data we read
|
238 |
process_cgi_output(req, data, cgiflags) |
|
239 |
||
240 |
def process_cgi_header_line(req, line, cgiflags): |
|
241 |
"""Process a line of CGI header data. line is a string representing a
|
|
242 |
complete line of text, stripped and without the newline.
|
|
243 |
"""
|
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
244 |
try: |
245 |
name, value = line.split(':', 1) |
|
246 |
except ValueError: |
|
247 |
# No colon. The user did not write valid headers.
|
|
248 |
if len(cgiflags.headers) == 0: |
|
249 |
# First line was not a header line. We can assume this is not
|
|
250 |
# a CGI app.
|
|
251 |
message = """You did not print a CGI header. |
|
252 |
CGI requires that you print a "Content-Type". You may wish to try:</p>
|
|
253 |
<pre style="margin-left: 1em">Content-Type: text/html</pre>"""
|
|
254 |
else: |
|
255 |
# They printed some header at least, but there was an invalid
|
|
256 |
# header.
|
|
257 |
message = """You printed an invalid CGI header. You need to leave |
|
258 |
a blank line after the headers, before writing the page contents."""
|
|
259 |
write_html_warning(req, message) |
|
260 |
cgiflags.wrote_html_warning = True |
|
261 |
# Handle the rest of this line as normal data
|
|
262 |
process_cgi_output(req, line + '\n', cgiflags) |
|
263 |
return
|
|
264 |
||
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
265 |
# Read CGI headers
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
266 |
value = value.strip() |
267 |
if name == "Content-Type": |
|
268 |
req.content_type = value |
|
269 |
elif name == "Location": |
|
270 |
req.location = value |
|
271 |
elif name == "Status": |
|
272 |
# Must be an integer, followed by a space, and then the status line
|
|
273 |
# which we ignore (seems like Apache has no way to send a custom
|
|
274 |
# status line).
|
|
275 |
try: |
|
276 |
req.status = int(value.split(' ', 1)[0]) |
|
277 |
except ValueError: |
|
278 |
message = """The "Status" CGI header was invalid. You need to |
|
279 |
print a number followed by a message, such as "302 Found"."""
|
|
280 |
write_html_warning(req, message) |
|
281 |
cgiflags.wrote_html_warning = True |
|
282 |
# Handle the rest of this line as normal data
|
|
283 |
process_cgi_output(req, line + '\n', cgiflags) |
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
284 |
else: |
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
285 |
# Generic HTTP header
|
286 |
# FIXME: Security risk letting users write arbitrary headers?
|
|
287 |
req.headers_out[name] = value |
|
288 |
cgiflags.headers[name] = value |
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
289 |
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
290 |
def write_html_warning(req, text): |
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
291 |
"""Prints an HTML warning about invalid CGI interaction on the part of the
|
292 |
user. text may contain HTML markup."""
|
|
245
by mattgiuca
interpret.py: More major work on CGI output handling. Now correctly handles |
293 |
req.content_type = "text/html" |
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
294 |
req.write("""<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
93
by mattgiuca
New directory hierarchy. |
295 |
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
296 |
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
297 |
<head>
|
|
298 |
<meta http-equiv="Content-Type"
|
|
299 |
content="text/html; charset=utf-8" />
|
|
300 |
</head>
|
|
228
by mattgiuca
common/ivle.css: changed font family from "sans" to "sans-serif" (this is |
301 |
<body style="margin: 0; padding: 0; font-family: sans-serif;">
|
93
by mattgiuca
New directory hierarchy. |
302 |
<div style="background-color: #faa; border-bottom: 1px solid black;
|
303 |
padding: 8px;">
|
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
304 |
<p><strong>Warning</strong>: %s |
93
by mattgiuca
New directory hierarchy. |
305 |
</div>
|
306 |
<div style="margin: 8px;">
|
|
307 |
<pre>
|
|
244
by mattgiuca
interpret.py: Major refactor of CGI interpreter code. Still missing some |
308 |
""" % text) |
309 |
||
97
by mattgiuca
Moved template.py and setup.py to better places. |
310 |
location_cgi_python = os.path.join(conf.ivle_install_dir, |
311 |
"bin/trampoline") |
|
93
by mattgiuca
New directory hierarchy. |
312 |
|
313 |
# Mapping of interpreter names (as given in conf/app/server.py) to
|
|
314 |
# interpreter functions.
|
|
315 |
||
316 |
interpreter_objects = { |
|
317 |
'cgi-python'
|
|
98
by mattgiuca
common/interpret.py: Now passes the location of the python interpreter to the |
318 |
: functools.partial(execute_cgi, "/usr/bin/python", |
319 |
location_cgi_python), |
|
93
by mattgiuca
New directory hierarchy. |
320 |
# Should also have:
|
321 |
# cgi-generic
|
|
322 |
# python-server-page
|
|
323 |
}
|
|
324 |
||
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
325 |
def fixup_environ(req): |
326 |
"""Assuming os.environ has been written with the CGI variables from
|
|
327 |
apache, make a few changes for security and correctness.
|
|
328 |
||
329 |
Does not modify req, only reads it.
|
|
330 |
"""
|
|
331 |
env = os.environ |
|
332 |
# Comments here are on the heavy side, explained carefully for security
|
|
333 |
# reasons. Please read carefully before making changes.
|
|
334 |
||
335 |
# Remove HTTP_COOKIE. It is a security risk to have students see the IVLE
|
|
336 |
# cookie of their visitors.
|
|
258
by mattgiuca
interpret.py: CGI handler does some checks before deleting env vars, to avoid |
337 |
try: |
338 |
del env['HTTP_COOKIE'] |
|
339 |
except: pass |
|
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
340 |
|
341 |
# Remove DOCUMENT_ROOT and SCRIPT_FILENAME. Not part of CGI spec and
|
|
342 |
# exposes unnecessary details about server.
|
|
258
by mattgiuca
interpret.py: CGI handler does some checks before deleting env vars, to avoid |
343 |
try: |
344 |
del env['DOCUMENT_ROOT'] |
|
345 |
except: pass |
|
346 |
try: |
|
347 |
del env['SCRIPT_FILENAME'] |
|
348 |
except: pass |
|
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
349 |
|
350 |
# Remove PATH. The PATH here is the path on the server machine; not useful
|
|
351 |
# inside the jail. It may be a good idea to add another path, reflecting
|
|
352 |
# the inside of the jail, but not done at this stage.
|
|
258
by mattgiuca
interpret.py: CGI handler does some checks before deleting env vars, to avoid |
353 |
try: |
354 |
del env['PATH'] |
|
355 |
except: pass |
|
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
356 |
|
357 |
# Remove SCRIPT_FILENAME. Not part of CGI spec (see SCRIPT_NAME).
|
|
358 |
||
359 |
# PATH_INFO is wrong because the script doesn't physically exist.
|
|
360 |
# Apache makes it relative to the "serve" app. It should actually be made
|
|
361 |
# relative to the student's script.
|
|
362 |
# TODO: At this stage, it is not possible to add a path after the script,
|
|
363 |
# so PATH_INFO is always "".
|
|
364 |
path_info = "" |
|
365 |
env['PATH_INFO'] = path_info |
|
366 |
||
367 |
# PATH_TRANSLATED currently points to a non-existant location within the
|
|
368 |
# local web server directory. Instead make it represent a path within the
|
|
369 |
# student jail.
|
|
254
by mattgiuca
editor.js: Extended edit box to 35 lines (was too small). |
370 |
(username, _, path_translated) = studpath.url_to_jailpaths(req.path) |
249
by mattgiuca
interpret.py: Added "fixup_environ" which performs a whole series of changes |
371 |
if len(path_translated) == 0 or path_translated[0] != os.sep: |
372 |
path_translated = os.sep + path_translated |
|
373 |
env['PATH_TRANSLATED'] = path_translated |
|
374 |
||
375 |
# CGI specifies that REMOTE_HOST SHOULD be set, and MAY just be set to
|
|
376 |
# REMOTE_ADDR. Since Apache does not appear to set this, set it to
|
|
377 |
# REMOTE_ADDR.
|
|
378 |
if 'REMOTE_HOST' not in env and 'REMOTE_ADDR' in env: |
|
379 |
env['REMOTE_HOST'] = env['REMOTE_ADDR'] |
|
380 |
||
381 |
# SCRIPT_NAME is the path to the script WITHOUT PATH_INFO.
|
|
382 |
script_name = req.uri |
|
383 |
if len(path_info) > 0: |
|
384 |
script_name = script_name[:-len(path_info)] |
|
385 |
env['SCRIPT_NAME'] = script_name |
|
386 |
||
387 |
# SERVER_SOFTWARE is actually not Apache but IVLE, since we are
|
|
388 |
# custom-making the CGI request.
|
|
389 |
env['SERVER_SOFTWARE'] = "IVLE/" + str(conf.ivle_version) |
|
254
by mattgiuca
editor.js: Extended edit box to 35 lines (was too small). |
390 |
|
391 |
# Additional environment variables
|
|
392 |
env['HOME'] = os.path.join('/home', username) |