~azzar1/unity/add-show-desktop-key

523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1
#!/usr/bin/env python
2
#
3
# Natural Language Toolkit: Documentation generation script
4
#
5
# Copyright (C) 2001-2006 University of Pennsylvania
6
# Author: Edward Loper <edloper@gradient.cis.upenn.edu>
7
#         Steven Bird (substantially cut down)
8
# URL: <http://nltk.sf.net>
9
# For license information, see LICENSE.TXT
10
11
r"""
12
This is a customized driver for converting docutils reStructuredText
13
documents into HTML and LaTeX.  It customizes the standard writers in
14
the following ways:
15
    
16
    - Source code highlighting is added to all doctest blocks.  In
17
      the HTML output, highlighting is performed using css classes:
18
      'pysrc-prompt', 'pysrc-keyword', 'pysrc-string', 'pysrc-comment',
19
      and 'pysrc-output'.
20
"""
21
22
import re, os.path, textwrap, sys, pickle
23
from optparse import OptionParser
24
25
import docutils.core, docutils.nodes, docutils.io
26
from docutils.writers import Writer
27
from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter
28
from docutils.parsers.rst import directives, roles
29
from docutils.readers.standalone import Reader as StandaloneReader
30
from docutils.transforms import Transform
31
import docutils.writers.html4css1
32
from doctest import DocTestParser
33
import docutils.statemachine
34
35
OUTPUT_FORMAT = None
36
"""A global variable, set by main(), indicating the output format for
37
   the current file.  Can be 'latex' or 'html' or 'ref'."""
38
39
OUTPUT_BASENAME = None
40
"""A global variable, set by main(), indicating the base filename
41
   of the current file (i.e., the filename with its extension
42
   stripped).  This is used to generate filenames for images."""
43
44
######################################################################
45
#{ Doctest Indentation
46
######################################################################
47
48
class UnindentDoctests(Transform):
49
    """
50
    In our source text, we have indented most of the doctest blocks,
51
    for two reasons: it makes copy/pasting with the doctest script
52
    easier; and it's more readable.  But we don't *actually* want them
53
    to be included in block_quote environments when we output them.
54
    So this transform looks for any doctest_block's that are the only
55
    child of a block_quote, and eliminates the block_quote.
56
    """
57
    default_priority = 1000
58
    def apply(self):
59
        self.document.walkabout(UnindentDoctestVisitor(self.document))
60
61
class UnindentDoctestVisitor(docutils.nodes.NodeVisitor):
62
    def __init__(self, document):
63
        docutils.nodes.NodeVisitor.__init__(self, document)
64
    def unknown_visit(self, node): pass
65
    def unknown_departure(self, node): pass
66
    def visit_block_quote(self, node):
67
        if (len(node) == sum([1 for c in node if
68
                              isinstance(c, docutils.nodes.doctest_block)])):
69
            node.replace_self(list(node))
70
        raise docutils.nodes.SkipNode()
71
        
72
_OPTION_DIRECTIVE_RE = re.compile(
73
    r'(\n[ ]*\.\.\.[ ]*)?#\s*doctest:\s*([^\n\'"]*)$', re.MULTILINE)
74
def strip_doctest_directives(text):
75
    return _OPTION_DIRECTIVE_RE.sub('', text)
76
77
class pylisting(docutils.nodes.General, docutils.nodes.Element):
78
    """
79
    Python source code listing.
80
81
    Children: doctest_block+ caption?
82
    """
83
84
######################################################################
85
#{ HTML Output
86
######################################################################
87
from epydoc.docwriter.html_colorize import PythonSourceColorizer
88
import epydoc.docwriter.html_colorize
89
epydoc.docwriter.html_colorize .PYSRC_EXPANDTO_JAVASCRIPT = ''
90
91
class CustomizedHTMLWriter(HTMLWriter):
92
    settings_defaults = HTMLWriter.settings_defaults.copy()
93
    settings_defaults.update({
94
        'output_encoding': 'ascii',
95
        'output_encoding_error_handler': 'xmlcharrefreplace',
96
        })
97
        
98
    def __init__(self):
99
        HTMLWriter.__init__(self)
100
        self.translator_class = CustomizedHTMLTranslator
101
102
    #def translate(self):
103
    #    postprocess(self.document)
104
    #    HTMLWriter.translate(self)
105
106
class CustomizedHTMLTranslator(HTMLTranslator):
107
    def __init__(self, document):
108
        HTMLTranslator.__init__(self, document)
109
110
    def visit_pylisting(self, node):
111
        self._write_pylisting_file(node)
112
        self.body.append(self.CODEBOX_HEADER % ('pylisting', 'pylisting'))
113
114
    def depart_pylisting(self, node):
115
        self.body.append(self.CODEBOX_FOOTER)
116
117
    def visit_doctest_block(self, node):
118
        # Collect the text content of the doctest block.
119
        text = ''.join(str(c) for c in node)
120
        text = textwrap.dedent(text)
121
        text = strip_doctest_directives(text)
122
123
        # Colorize the contents of the doctest block.
124
        colorizer = HTMLDoctestColorizer(self.encode)
125
        if node.get('is_codeblock'):
126
            pysrc = colorizer.colorize_codeblock(text)
127
        else:
128
            pysrc = colorizer.colorize_doctest(text)
129
130
        if node.get('is_codeblock'): typ = 'codeblock' 
131
        else: typ = 'doctest'
132
        pysrc = self.CODEBOX_ROW % (typ, pysrc)
133
134
        if not isinstance(node.parent, pylisting):
135
            self.body.append(self.CODEBOX_HEADER % ('doctest', 'doctest'))
136
            self.body.append(pysrc)
137
            self.body.append(self.CODEBOX_FOOTER)
138
        else:
139
            self.body.append(pysrc)
140
            
141
        raise docutils.nodes.SkipNode() # Content already processed
142
143
    CODEBOX_HEADER = ('<div class="%s">\n'
144
                        '<table border="0" cellpadding="0" cellspacing="0" '
145
                        'class="%s" width="95%%">\n')
146
    CODEBOX_FOOTER = '</table></div>\n'
147
    CODEBOX_ROW = textwrap.dedent('''\
148
      <tr><td class="%s">
149
      <table border="0" cellpadding="0" cellspacing="0" width="100%%">
150
      <tr><td class="pysrc">%s</td>
151
      </tr></table></td></tr>\n''')
152
153
    # For generated pylisting files:
154
    _PYLISTING_FILE_HEADER = "# Natural Language Toolkit: %s\n\n"
155
156
    def _write_pylisting_file(self, node):
157
        if not os.path.exists(PYLISTING_DIR):
158
            os.mkdir(PYLISTING_DIR)
159
            
160
        name = re.sub('\W', '_', node['name'])
161
        filename = os.path.join(PYLISTING_DIR, name+PYLISTING_EXTENSION)
162
        out = open(filename, 'w')
163
        out.write(self._PYLISTING_FILE_HEADER % name)
164
        for child in node:
165
            if not isinstance(child, docutils.nodes.doctest_block):
166
                continue
167
            elif child['is_codeblock']:
168
                out.write(''.join(str(c) for c in child)+'\n\n')
169
            elif INCLUDE_DOCTESTS_IN_PYLISTING_FILES:
170
                lines = ''.join(str(c) for c in child).split('\n')
171
                in_doctest_block = False
172
                for line in lines:
173
                    if line.startswith('>>> '):
174
                        out.write(line[4:]+'\n')
175
                        in_doctest_block = True
176
                    elif line.startswith('... ') and in_doctest_block:
177
                        out.write(line[4:]+'\n')
178
                    elif line.strip():
179
                        if in_doctest_block:
180
                            out.write('# Expect:\n')
181
                        out.write('#     ' + line+'\n')
182
                        in_doctest_block = False
183
                    else:
184
                        out.write(line+'\n')
185
                        in_doctest_block = False
186
        out.close()
187
188
    def visit_exercise(self, node):
189
        self.body.append('<exercise src="')
190
191
    def depart_exercise(self, node):
192
        self.body.append('"/>')
193
194
    def visit_literal(self, node):
195
        """Process text to prevent tokens from wrapping."""
196
        text = ''.join(str(c) for c in node)
197
        colorizer = HTMLDoctestColorizer(self.encode)
198
        pysrc = colorizer.colorize_inline(text)#.strip()
199
        #pysrc = colorize_doctestblock(text, self._markup_pysrc, True)
200
        self.body+= [self.starttag(node, 'tt', '', CLASS='doctest'),
201
                     '<span class="pre">%s</span></tt>' % pysrc]
202
        raise docutils.nodes.SkipNode() # Content already processed
203
                          
204
    def depart_field_name(self, node):
205
        # Don't add ":" in callout field lists.
206
        if 'callout' in node['classes']:
207
            self.body.append(self.context.pop())
208
        else:
209
            HTMLTranslator.depart_field_name(self, node)
210
    
211
    def _striphtml_len(self, s):
212
        return len(re.sub(r'&[^;]+;', 'x', re.sub(r'<[^<]+>', '', s)))
213
214
    def visit_caption(self, node):
215
        HTMLTranslator.visit_caption(self, node)
216
        
217
    def depart_caption(self, node):
218
        HTMLTranslator.depart_caption(self, node)
219
220
    def starttag(self, node, tagname, suffix='\n', empty=0, **attributes):
221
        if node.get('mimetype'):
222
            attributes['type'] = node.get('mimetype')
223
        return HTMLTranslator.starttag(self, node, tagname, suffix,
224
                                       empty, **attributes)
225
        
226
######################################################################
227
#{ Source Code Highlighting
228
######################################################################
229
230
# [xx] Note: requires the very latest svn version of epydoc!
231
from epydoc.markup.doctest import DoctestColorizer
232
233
class HTMLDoctestColorizer(DoctestColorizer):
234
    PREFIX = '<pre class="doctest">\n'
235
    SUFFIX = '</pre>\n'
236
    def __init__(self, encode_func, callouts=None):
237
        self.encode = encode_func
238
        self.callouts = callouts
239
    def markup(self, s, tag):
240
        if tag == 'other':
241
            return self.encode(s)
242
        elif (tag == 'comment' and self.callouts is not None and
243
              CALLOUT_RE.match(s)):
244
            callout_id = CALLOUT_RE.match(s).group(1)
245
            callout_num = self.callouts[callout_id]
246
            img = CALLOUT_IMG % (callout_num, callout_num)
247
            return ('<a name="%s" /><a href="#ref-%s">%s</a>' %
248
                    (callout_id, callout_id, img))
249
        else:
250
            return ('<span class="pysrc-%s">%s</span>' %
251
                    (tag, self.encode(s)))
252
253
######################################################################
254
#{ Customized Reader (register new transforms)
255
######################################################################
256
257
class CustomizedReader(StandaloneReader):
258
    _TRANSFORMS = [
259
        UnindentDoctests,           # 1000
260
        ]
261
    def get_transforms(self):
262
        return StandaloneReader.get_transforms(self) + self._TRANSFORMS
263
264
######################################################################
265
#{ Main Function
266
######################################################################
267
268
_OUTPUT_RE = re.compile(r'<div class="document">\s+(.*)\s+</div>\n</body>\n</html>',
269
    re.MULTILINE | re.DOTALL)
270
271
def rst(input):
272
    try:
273
        CustomizedHTMLWriter.settings_defaults.update({'stylesheet_path': '/dev/null'})
274
        output = docutils.core.publish_string(input,
275
            writer=CustomizedHTMLWriter(), reader=CustomizedReader())
276
        match = _OUTPUT_RE.search(output)
277
        if match:
278
            return match.group(1)
279
        else:
280
            raise ValueError('Could not process exercise definition')
281
282
    except docutils.utils.SystemMessage, e:
283
        print 'Fatal error encountered!', e
284
        raise
285
        sys.exit(-1)
286