3
# Natural Language Toolkit: Documentation generation script
5
# Copyright (C) 2001-2006 University of Pennsylvania
6
# Author: Edward Loper <edloper@gradient.cis.upenn.edu>
7
# Steven Bird (substantially cut down)
8
# URL: <http://nltk.sf.net>
9
# For license information, see LICENSE.TXT
12
This is a customized driver for converting docutils reStructuredText
13
documents into HTML and LaTeX. It customizes the standard writers in
16
- Source code highlighting is added to all doctest blocks. In
17
the HTML output, highlighting is performed using css classes:
18
'pysrc-prompt', 'pysrc-keyword', 'pysrc-string', 'pysrc-comment',
22
import re, os.path, textwrap, sys, pickle
23
from optparse import OptionParser
25
import docutils.core, docutils.nodes, docutils.io
26
from docutils.writers import Writer
27
from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter
28
from docutils.parsers.rst import directives, roles
29
from docutils.readers.standalone import Reader as StandaloneReader
30
from docutils.transforms import Transform
31
import docutils.writers.html4css1
32
from doctest import DocTestParser
33
import docutils.statemachine
36
"""A global variable, set by main(), indicating the output format for
37
the current file. Can be 'latex' or 'html' or 'ref'."""
39
OUTPUT_BASENAME = None
40
"""A global variable, set by main(), indicating the base filename
41
of the current file (i.e., the filename with its extension
42
stripped). This is used to generate filenames for images."""
44
######################################################################
45
#{ Doctest Indentation
46
######################################################################
48
class UnindentDoctests(Transform):
50
In our source text, we have indented most of the doctest blocks,
51
for two reasons: it makes copy/pasting with the doctest script
52
easier; and it's more readable. But we don't *actually* want them
53
to be included in block_quote environments when we output them.
54
So this transform looks for any doctest_block's that are the only
55
child of a block_quote, and eliminates the block_quote.
57
default_priority = 1000
59
self.document.walkabout(UnindentDoctestVisitor(self.document))
61
class UnindentDoctestVisitor(docutils.nodes.NodeVisitor):
62
def __init__(self, document):
63
docutils.nodes.NodeVisitor.__init__(self, document)
64
def unknown_visit(self, node): pass
65
def unknown_departure(self, node): pass
66
def visit_block_quote(self, node):
67
if (len(node) == sum([1 for c in node if
68
isinstance(c, docutils.nodes.doctest_block)])):
69
node.replace_self(list(node))
70
raise docutils.nodes.SkipNode()
72
_OPTION_DIRECTIVE_RE = re.compile(
73
r'(\n[ ]*\.\.\.[ ]*)?#\s*doctest:\s*([^\n\'"]*)$', re.MULTILINE)
74
def strip_doctest_directives(text):
75
return _OPTION_DIRECTIVE_RE.sub('', text)
77
class pylisting(docutils.nodes.General, docutils.nodes.Element):
79
Python source code listing.
81
Children: doctest_block+ caption?
84
######################################################################
86
######################################################################
87
from epydoc.docwriter.html_colorize import PythonSourceColorizer
88
import epydoc.docwriter.html_colorize
89
epydoc.docwriter.html_colorize .PYSRC_EXPANDTO_JAVASCRIPT = ''
91
class CustomizedHTMLWriter(HTMLWriter):
92
settings_defaults = HTMLWriter.settings_defaults.copy()
93
settings_defaults.update({
94
'output_encoding': 'ascii',
95
'output_encoding_error_handler': 'xmlcharrefreplace',
99
HTMLWriter.__init__(self)
100
self.translator_class = CustomizedHTMLTranslator
102
#def translate(self):
103
# postprocess(self.document)
104
# HTMLWriter.translate(self)
106
class CustomizedHTMLTranslator(HTMLTranslator):
107
def __init__(self, document):
108
HTMLTranslator.__init__(self, document)
110
def visit_pylisting(self, node):
111
self._write_pylisting_file(node)
112
self.body.append(self.CODEBOX_HEADER % ('pylisting', 'pylisting'))
114
def depart_pylisting(self, node):
115
self.body.append(self.CODEBOX_FOOTER)
117
def visit_doctest_block(self, node):
118
# Collect the text content of the doctest block.
119
text = ''.join(str(c) for c in node)
120
text = textwrap.dedent(text)
121
text = strip_doctest_directives(text)
123
# Colorize the contents of the doctest block.
124
colorizer = HTMLDoctestColorizer(self.encode)
125
if node.get('is_codeblock'):
126
pysrc = colorizer.colorize_codeblock(text)
128
pysrc = colorizer.colorize_doctest(text)
130
if node.get('is_codeblock'): typ = 'codeblock'
131
else: typ = 'doctest'
132
pysrc = self.CODEBOX_ROW % (typ, pysrc)
134
if not isinstance(node.parent, pylisting):
135
self.body.append(self.CODEBOX_HEADER % ('doctest', 'doctest'))
136
self.body.append(pysrc)
137
self.body.append(self.CODEBOX_FOOTER)
139
self.body.append(pysrc)
141
raise docutils.nodes.SkipNode() # Content already processed
143
CODEBOX_HEADER = ('<div class="%s">\n'
144
'<table border="0" cellpadding="0" cellspacing="0" '
145
'class="%s" width="95%%">\n')
146
CODEBOX_FOOTER = '</table></div>\n'
147
CODEBOX_ROW = textwrap.dedent('''\
149
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
150
<tr><td class="pysrc">%s</td>
151
</tr></table></td></tr>\n''')
153
# For generated pylisting files:
154
_PYLISTING_FILE_HEADER = "# Natural Language Toolkit: %s\n\n"
156
def _write_pylisting_file(self, node):
157
if not os.path.exists(PYLISTING_DIR):
158
os.mkdir(PYLISTING_DIR)
160
name = re.sub('\W', '_', node['name'])
161
filename = os.path.join(PYLISTING_DIR, name+PYLISTING_EXTENSION)
162
out = open(filename, 'w')
163
out.write(self._PYLISTING_FILE_HEADER % name)
165
if not isinstance(child, docutils.nodes.doctest_block):
167
elif child['is_codeblock']:
168
out.write(''.join(str(c) for c in child)+'\n\n')
169
elif INCLUDE_DOCTESTS_IN_PYLISTING_FILES:
170
lines = ''.join(str(c) for c in child).split('\n')
171
in_doctest_block = False
173
if line.startswith('>>> '):
174
out.write(line[4:]+'\n')
175
in_doctest_block = True
176
elif line.startswith('... ') and in_doctest_block:
177
out.write(line[4:]+'\n')
180
out.write('# Expect:\n')
181
out.write('# ' + line+'\n')
182
in_doctest_block = False
185
in_doctest_block = False
188
def visit_exercise(self, node):
189
self.body.append('<exercise src="')
191
def depart_exercise(self, node):
192
self.body.append('"/>')
194
def visit_literal(self, node):
195
"""Process text to prevent tokens from wrapping."""
196
text = ''.join(str(c) for c in node)
197
colorizer = HTMLDoctestColorizer(self.encode)
198
pysrc = colorizer.colorize_inline(text)#.strip()
199
#pysrc = colorize_doctestblock(text, self._markup_pysrc, True)
200
self.body+= [self.starttag(node, 'tt', '', CLASS='doctest'),
201
'<span class="pre">%s</span></tt>' % pysrc]
202
raise docutils.nodes.SkipNode() # Content already processed
204
def depart_field_name(self, node):
205
# Don't add ":" in callout field lists.
206
if 'callout' in node['classes']:
207
self.body.append(self.context.pop())
209
HTMLTranslator.depart_field_name(self, node)
211
def _striphtml_len(self, s):
212
return len(re.sub(r'&[^;]+;', 'x', re.sub(r'<[^<]+>', '', s)))
214
def visit_caption(self, node):
215
HTMLTranslator.visit_caption(self, node)
217
def depart_caption(self, node):
218
HTMLTranslator.depart_caption(self, node)
220
def starttag(self, node, tagname, suffix='\n', empty=0, **attributes):
221
if node.get('mimetype'):
222
attributes['type'] = node.get('mimetype')
223
return HTMLTranslator.starttag(self, node, tagname, suffix,
226
######################################################################
227
#{ Source Code Highlighting
228
######################################################################
230
# [xx] Note: requires the very latest svn version of epydoc!
231
from epydoc.markup.doctest import DoctestColorizer
233
class HTMLDoctestColorizer(DoctestColorizer):
234
PREFIX = '<pre class="doctest">\n'
236
def __init__(self, encode_func, callouts=None):
237
self.encode = encode_func
238
self.callouts = callouts
239
def markup(self, s, tag):
241
return self.encode(s)
242
elif (tag == 'comment' and self.callouts is not None and
243
CALLOUT_RE.match(s)):
244
callout_id = CALLOUT_RE.match(s).group(1)
245
callout_num = self.callouts[callout_id]
246
img = CALLOUT_IMG % (callout_num, callout_num)
247
return ('<a name="%s" /><a href="#ref-%s">%s</a>' %
248
(callout_id, callout_id, img))
250
return ('<span class="pysrc-%s">%s</span>' %
251
(tag, self.encode(s)))
253
######################################################################
254
#{ Customized Reader (register new transforms)
255
######################################################################
257
class CustomizedReader(StandaloneReader):
259
UnindentDoctests, # 1000
261
def get_transforms(self):
262
return StandaloneReader.get_transforms(self) + self._TRANSFORMS
264
######################################################################
266
######################################################################
268
_OUTPUT_RE = re.compile(r'<div class="document">\s+(.*)\s+</div>\n</body>\n</html>',
269
re.MULTILINE | re.DOTALL)
273
CustomizedHTMLWriter.settings_defaults.update({'stylesheet_path': '/dev/null'})
274
output = docutils.core.publish_string(input,
275
writer=CustomizedHTMLWriter(), reader=CustomizedReader())
276
match = _OUTPUT_RE.search(output)
278
return match.group(1)
280
raise ValueError('Could not process exercise definition')
282
except docutils.utils.SystemMessage, e:
283
print 'Fatal error encountered!', e