523
by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions, |
1 |
#!/usr/bin/env python
|
2 |
#
|
|
3 |
# Natural Language Toolkit: Documentation generation script
|
|
4 |
#
|
|
5 |
# Copyright (C) 2001-2006 University of Pennsylvania
|
|
6 |
# Author: Edward Loper <edloper@gradient.cis.upenn.edu>
|
|
7 |
# Steven Bird (substantially cut down)
|
|
8 |
# URL: <http://nltk.sf.net>
|
|
9 |
# For license information, see LICENSE.TXT
|
|
10 |
||
11 |
r""" |
|
12 |
This is a customized driver for converting docutils reStructuredText
|
|
13 |
documents into HTML and LaTeX. It customizes the standard writers in
|
|
14 |
the following ways:
|
|
15 |
|
|
16 |
- Source code highlighting is added to all doctest blocks. In
|
|
17 |
the HTML output, highlighting is performed using css classes:
|
|
18 |
'pysrc-prompt', 'pysrc-keyword', 'pysrc-string', 'pysrc-comment',
|
|
19 |
and 'pysrc-output'.
|
|
20 |
"""
|
|
21 |
||
22 |
import re, os.path, textwrap, sys, pickle |
|
23 |
from optparse import OptionParser |
|
24 |
||
25 |
import docutils.core, docutils.nodes, docutils.io |
|
26 |
from docutils.writers import Writer |
|
27 |
from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter |
|
28 |
from docutils.parsers.rst import directives, roles |
|
29 |
from docutils.readers.standalone import Reader as StandaloneReader |
|
30 |
from docutils.transforms import Transform |
|
31 |
import docutils.writers.html4css1 |
|
32 |
from doctest import DocTestParser |
|
33 |
import docutils.statemachine |
|
34 |
||
35 |
OUTPUT_FORMAT = None |
|
36 |
"""A global variable, set by main(), indicating the output format for
|
|
37 |
the current file. Can be 'latex' or 'html' or 'ref'."""
|
|
38 |
||
39 |
OUTPUT_BASENAME = None |
|
40 |
"""A global variable, set by main(), indicating the base filename
|
|
41 |
of the current file (i.e., the filename with its extension
|
|
42 |
stripped). This is used to generate filenames for images."""
|
|
43 |
||
44 |
######################################################################
|
|
45 |
#{ Doctest Indentation
|
|
46 |
######################################################################
|
|
47 |
||
48 |
class UnindentDoctests(Transform): |
|
49 |
"""
|
|
50 |
In our source text, we have indented most of the doctest blocks,
|
|
51 |
for two reasons: it makes copy/pasting with the doctest script
|
|
52 |
easier; and it's more readable. But we don't *actually* want them
|
|
53 |
to be included in block_quote environments when we output them.
|
|
54 |
So this transform looks for any doctest_block's that are the only
|
|
55 |
child of a block_quote, and eliminates the block_quote.
|
|
56 |
"""
|
|
57 |
default_priority = 1000 |
|
58 |
def apply(self): |
|
59 |
self.document.walkabout(UnindentDoctestVisitor(self.document)) |
|
60 |
||
61 |
class UnindentDoctestVisitor(docutils.nodes.NodeVisitor): |
|
62 |
def __init__(self, document): |
|
63 |
docutils.nodes.NodeVisitor.__init__(self, document) |
|
64 |
def unknown_visit(self, node): pass |
|
65 |
def unknown_departure(self, node): pass |
|
66 |
def visit_block_quote(self, node): |
|
67 |
if (len(node) == sum([1 for c in node if |
|
68 |
isinstance(c, docutils.nodes.doctest_block)])): |
|
69 |
node.replace_self(list(node)) |
|
70 |
raise docutils.nodes.SkipNode() |
|
71 |
||
72 |
_OPTION_DIRECTIVE_RE = re.compile( |
|
73 |
r'(\n[ ]*\.\.\.[ ]*)?#\s*doctest:\s*([^\n\'"]*)$', re.MULTILINE) |
|
74 |
def strip_doctest_directives(text): |
|
75 |
return _OPTION_DIRECTIVE_RE.sub('', text) |
|
76 |
||
77 |
class pylisting(docutils.nodes.General, docutils.nodes.Element): |
|
78 |
"""
|
|
79 |
Python source code listing.
|
|
80 |
||
81 |
Children: doctest_block+ caption?
|
|
82 |
"""
|
|
83 |
||
84 |
######################################################################
|
|
85 |
#{ HTML Output
|
|
86 |
######################################################################
|
|
87 |
from epydoc.docwriter.html_colorize import PythonSourceColorizer |
|
88 |
import epydoc.docwriter.html_colorize |
|
89 |
epydoc.docwriter.html_colorize .PYSRC_EXPANDTO_JAVASCRIPT = '' |
|
90 |
||
91 |
class CustomizedHTMLWriter(HTMLWriter): |
|
92 |
settings_defaults = HTMLWriter.settings_defaults.copy() |
|
93 |
settings_defaults.update({ |
|
94 |
'output_encoding': 'ascii', |
|
95 |
'output_encoding_error_handler': 'xmlcharrefreplace', |
|
96 |
})
|
|
97 |
||
98 |
def __init__(self): |
|
99 |
HTMLWriter.__init__(self) |
|
100 |
self.translator_class = CustomizedHTMLTranslator |
|
101 |
||
102 |
#def translate(self):
|
|
103 |
# postprocess(self.document)
|
|
104 |
# HTMLWriter.translate(self)
|
|
105 |
||
106 |
class CustomizedHTMLTranslator(HTMLTranslator): |
|
107 |
def __init__(self, document): |
|
108 |
HTMLTranslator.__init__(self, document) |
|
109 |
||
110 |
def visit_pylisting(self, node): |
|
111 |
self._write_pylisting_file(node) |
|
112 |
self.body.append(self.CODEBOX_HEADER % ('pylisting', 'pylisting')) |
|
113 |
||
114 |
def depart_pylisting(self, node): |
|
115 |
self.body.append(self.CODEBOX_FOOTER) |
|
116 |
||
117 |
def visit_doctest_block(self, node): |
|
118 |
# Collect the text content of the doctest block.
|
|
119 |
text = ''.join(str(c) for c in node) |
|
120 |
text = textwrap.dedent(text) |
|
121 |
text = strip_doctest_directives(text) |
|
122 |
||
123 |
# Colorize the contents of the doctest block.
|
|
124 |
colorizer = HTMLDoctestColorizer(self.encode) |
|
125 |
if node.get('is_codeblock'): |
|
126 |
pysrc = colorizer.colorize_codeblock(text) |
|
127 |
else: |
|
128 |
pysrc = colorizer.colorize_doctest(text) |
|
129 |
||
130 |
if node.get('is_codeblock'): typ = 'codeblock' |
|
131 |
else: typ = 'doctest' |
|
132 |
pysrc = self.CODEBOX_ROW % (typ, pysrc) |
|
133 |
||
134 |
if not isinstance(node.parent, pylisting): |
|
135 |
self.body.append(self.CODEBOX_HEADER % ('doctest', 'doctest')) |
|
136 |
self.body.append(pysrc) |
|
137 |
self.body.append(self.CODEBOX_FOOTER) |
|
138 |
else: |
|
139 |
self.body.append(pysrc) |
|
140 |
||
141 |
raise docutils.nodes.SkipNode() # Content already processed |
|
142 |
||
143 |
CODEBOX_HEADER = ('<div class="%s">\n' |
|
144 |
'<table border="0" cellpadding="0" cellspacing="0" '
|
|
145 |
'class="%s" width="95%%">\n') |
|
146 |
CODEBOX_FOOTER = '</table></div>\n' |
|
147 |
CODEBOX_ROW = textwrap.dedent('''\ |
|
148 |
<tr><td class="%s"> |
|
149 |
<table border="0" cellpadding="0" cellspacing="0" width="100%%"> |
|
150 |
<tr><td class="pysrc">%s</td> |
|
151 |
</tr></table></td></tr>\n''') |
|
152 |
||
153 |
# For generated pylisting files:
|
|
154 |
_PYLISTING_FILE_HEADER = "# Natural Language Toolkit: %s\n\n" |
|
155 |
||
156 |
def _write_pylisting_file(self, node): |
|
157 |
if not os.path.exists(PYLISTING_DIR): |
|
158 |
os.mkdir(PYLISTING_DIR) |
|
159 |
||
160 |
name = re.sub('\W', '_', node['name']) |
|
161 |
filename = os.path.join(PYLISTING_DIR, name+PYLISTING_EXTENSION) |
|
162 |
out = open(filename, 'w') |
|
163 |
out.write(self._PYLISTING_FILE_HEADER % name) |
|
164 |
for child in node: |
|
165 |
if not isinstance(child, docutils.nodes.doctest_block): |
|
166 |
continue
|
|
167 |
elif child['is_codeblock']: |
|
168 |
out.write(''.join(str(c) for c in child)+'\n\n') |
|
169 |
elif INCLUDE_DOCTESTS_IN_PYLISTING_FILES: |
|
170 |
lines = ''.join(str(c) for c in child).split('\n') |
|
171 |
in_doctest_block = False |
|
172 |
for line in lines: |
|
173 |
if line.startswith('>>> '): |
|
174 |
out.write(line[4:]+'\n') |
|
175 |
in_doctest_block = True |
|
176 |
elif line.startswith('... ') and in_doctest_block: |
|
177 |
out.write(line[4:]+'\n') |
|
178 |
elif line.strip(): |
|
179 |
if in_doctest_block: |
|
180 |
out.write('# Expect:\n') |
|
181 |
out.write('# ' + line+'\n') |
|
182 |
in_doctest_block = False |
|
183 |
else: |
|
184 |
out.write(line+'\n') |
|
185 |
in_doctest_block = False |
|
186 |
out.close() |
|
187 |
||
188 |
def visit_exercise(self, node): |
|
189 |
self.body.append('<exercise src="') |
|
190 |
||
191 |
def depart_exercise(self, node): |
|
192 |
self.body.append('"/>') |
|
193 |
||
194 |
def visit_literal(self, node): |
|
195 |
"""Process text to prevent tokens from wrapping."""
|
|
196 |
text = ''.join(str(c) for c in node) |
|
197 |
colorizer = HTMLDoctestColorizer(self.encode) |
|
198 |
pysrc = colorizer.colorize_inline(text)#.strip() |
|
199 |
#pysrc = colorize_doctestblock(text, self._markup_pysrc, True)
|
|
200 |
self.body+= [self.starttag(node, 'tt', '', CLASS='doctest'), |
|
201 |
'<span class="pre">%s</span></tt>' % pysrc] |
|
202 |
raise docutils.nodes.SkipNode() # Content already processed |
|
203 |
||
204 |
def depart_field_name(self, node): |
|
205 |
# Don't add ":" in callout field lists.
|
|
206 |
if 'callout' in node['classes']: |
|
207 |
self.body.append(self.context.pop()) |
|
208 |
else: |
|
209 |
HTMLTranslator.depart_field_name(self, node) |
|
210 |
||
211 |
def _striphtml_len(self, s): |
|
212 |
return len(re.sub(r'&[^;]+;', 'x', re.sub(r'<[^<]+>', '', s))) |
|
213 |
||
214 |
def visit_caption(self, node): |
|
215 |
HTMLTranslator.visit_caption(self, node) |
|
216 |
||
217 |
def depart_caption(self, node): |
|
218 |
HTMLTranslator.depart_caption(self, node) |
|
219 |
||
220 |
def starttag(self, node, tagname, suffix='\n', empty=0, **attributes): |
|
221 |
if node.get('mimetype'): |
|
222 |
attributes['type'] = node.get('mimetype') |
|
223 |
return HTMLTranslator.starttag(self, node, tagname, suffix, |
|
224 |
empty, **attributes) |
|
225 |
||
226 |
######################################################################
|
|
227 |
#{ Source Code Highlighting
|
|
228 |
######################################################################
|
|
229 |
||
230 |
# [xx] Note: requires the very latest svn version of epydoc!
|
|
231 |
from epydoc.markup.doctest import DoctestColorizer |
|
232 |
||
233 |
class HTMLDoctestColorizer(DoctestColorizer): |
|
234 |
PREFIX = '<pre class="doctest">\n' |
|
235 |
SUFFIX = '</pre>\n' |
|
236 |
def __init__(self, encode_func, callouts=None): |
|
237 |
self.encode = encode_func |
|
238 |
self.callouts = callouts |
|
239 |
def markup(self, s, tag): |
|
240 |
if tag == 'other': |
|
241 |
return self.encode(s) |
|
242 |
elif (tag == 'comment' and self.callouts is not None and |
|
243 |
CALLOUT_RE.match(s)): |
|
244 |
callout_id = CALLOUT_RE.match(s).group(1) |
|
245 |
callout_num = self.callouts[callout_id] |
|
246 |
img = CALLOUT_IMG % (callout_num, callout_num) |
|
247 |
return ('<a name="%s" /><a href="#ref-%s">%s</a>' % |
|
248 |
(callout_id, callout_id, img)) |
|
249 |
else: |
|
250 |
return ('<span class="pysrc-%s">%s</span>' % |
|
251 |
(tag, self.encode(s))) |
|
252 |
||
253 |
######################################################################
|
|
254 |
#{ Customized Reader (register new transforms)
|
|
255 |
######################################################################
|
|
256 |
||
257 |
class CustomizedReader(StandaloneReader): |
|
258 |
_TRANSFORMS = [ |
|
259 |
UnindentDoctests, # 1000 |
|
260 |
]
|
|
261 |
def get_transforms(self): |
|
262 |
return StandaloneReader.get_transforms(self) + self._TRANSFORMS |
|
263 |
||
264 |
######################################################################
|
|
265 |
#{ Main Function
|
|
266 |
######################################################################
|
|
267 |
||
268 |
_OUTPUT_RE = re.compile(r'<div class="document">\s+(.*)\s+</div>\n</body>\n</html>', |
|
269 |
re.MULTILINE | re.DOTALL) |
|
270 |
||
271 |
def rst(input): |
|
272 |
try: |
|
273 |
CustomizedHTMLWriter.settings_defaults.update({'stylesheet_path': '/dev/null'}) |
|
274 |
output = docutils.core.publish_string(input, |
|
275 |
writer=CustomizedHTMLWriter(), reader=CustomizedReader()) |
|
276 |
match = _OUTPUT_RE.search(output) |
|
277 |
if match: |
|
278 |
return match.group(1) |
|
279 |
else: |
|
280 |
raise ValueError('Could not process exercise definition') |
|
281 |
||
282 |
except docutils.utils.SystemMessage, e: |
|
283 |
print 'Fatal error encountered!', e |
|
284 |
raise
|
|
285 |
sys.exit(-1) |
|
286 |