~azzar1/unity/add-show-desktop-key

523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1
#
2
# Natural Language Toolkit: Documentation generation script
3
#
4
# Copyright (C) 2001-2006 University of Pennsylvania
5
# Author: Edward Loper <edloper@gradient.cis.upenn.edu>
6
#         Steven Bird (substantially cut down)
7
# URL: <http://nltk.sf.net>
8
# For license information, see LICENSE.TXT
9
10
r"""
11
This is a customized driver for converting docutils reStructuredText
12
documents into HTML and LaTeX.  It customizes the standard writers in
13
the following ways:
14
    
15
    - Source code highlighting is added to all doctest blocks.  In
16
      the HTML output, highlighting is performed using css classes:
17
      'pysrc-prompt', 'pysrc-keyword', 'pysrc-string', 'pysrc-comment',
18
      and 'pysrc-output'.
19
"""
1099.1.225 by Nick Chadwick
Modified the setup script to include '.txt' files.
20
import re, os.path, textwrap, sys, pickle, inspect
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
21
from optparse import OptionParser
22
23
import docutils.core, docutils.nodes, docutils.io
24
from docutils.writers import Writer
25
from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter
26
from docutils.parsers.rst import directives, roles
27
from docutils.readers.standalone import Reader as StandaloneReader
28
from docutils.transforms import Transform
29
import docutils.writers.html4css1
30
from doctest import DocTestParser
31
import docutils.statemachine
32
1099.1.218 by Nick Chadwick
tutorials can now use RST
33
34
35
LATEX_VALIGN_IS_BROKEN = True
36
"""Set to true to compensate for a bug in the latex writer.  I've
37
   submitted a patch to docutils, so hopefully this wil be fixed
38
   soon."""
39
40
LATEX_DPI = 140
41
"""The scaling factor that should be used to display bitmapped images
42
   in latex/pdf output (specified in dots per inch).  E.g., if a
43
   bitmapped image is 100 pixels wide, it will be scaled to
44
   100/LATEX_DPI inches wide for the latex/pdf output.  (Larger
45
   values produce smaller images in the generated pdf.)"""
46
47
TREE_IMAGE_DIR = 'tree_images/'
48
"""The directory that tree images should be written to."""
49
50
EXTERN_REFERENCE_FILES = []
51
"""A list of .ref files, for crossrefering to external documents (used
52
   when building one chapter at a time)."""
53
54
BIBTEX_FILE = '../refs.bib'
55
"""The name of the bibtex file used to generate bibliographic entries."""
56
57
BIBLIOGRAPHY_HTML = "bibliography.html"
58
"""The name of the HTML file containing the bibliography (for
59
   hyperrefs from citations)."""
60
61
# needs to include "../doc" so it works in /doc_contrib
62
LATEX_STYLESHEET_PATH = '../../doc/definitions.sty'
63
"""The name of the LaTeX style file used for generating PDF output."""
64
65
LOCAL_BIBLIOGRAPHY = False
66
"""If true, assume that this document contains the bibliography, and
67
   link to it locally; if false, assume that bibliographic links
68
   should point to L{BIBLIOGRAPHY_HTML}."""
69
70
PYLISTING_DIR = 'pylisting/'
71
"""The directory where pylisting files should be written."""
72
73
PYLISTING_EXTENSION = ".py"
74
"""Extension for pylisting files."""
75
76
INCLUDE_DOCTESTS_IN_PYLISTING_FILES = False
77
"""If true, include code from doctests in the generated pylisting
78
   files. """
79
80
CALLOUT_IMG = '<img src="callouts/callout%s.gif" alt="[%s]" class="callout" />'
81
"""HTML code for callout images in pylisting blocks."""
82
83
REF_EXTENSION = '.ref'
84
"""File extension for reference files."""
85
86
# needs to include "../doc" so it works in /doc_contrib
87
CSS_STYLESHEET = '/dev/null' #/home/nick/exercise-ui/ivle/webapp/tutorial/media/nltkdoc.css'
88
89
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
90
OUTPUT_FORMAT = None
91
"""A global variable, set by main(), indicating the output format for
92
   the current file.  Can be 'latex' or 'html' or 'ref'."""
93
94
OUTPUT_BASENAME = None
95
"""A global variable, set by main(), indicating the base filename
96
   of the current file (i.e., the filename with its extension
97
   stripped).  This is used to generate filenames for images."""
98
1099.1.218 by Nick Chadwick
tutorials can now use RST
99
COPY_CLIPBOARD_JS = ''
100
101
102
######################################################################
103
#{ Reference files
104
######################################################################
105
106
def read_ref_file(basename=None):
107
    if basename is None: basename = OUTPUT_BASENAME
108
    if not os.path.exists(basename + REF_EXTENSION):
109
        warning('File %r does not exist!' %
110
                (basename + REF_EXTENSION))
111
        return dict(targets=(),terms={},reference_labes={})
112
    f = open(basename + REF_EXTENSION)
113
    ref_info = pickle.load(f)
114
    f.close()
115
    return ref_info
116
117
def write_ref_file(ref_info):
118
    f = open(OUTPUT_BASENAME + REF_EXTENSION, 'w')
119
    pickle.dump(ref_info, f)
120
    f.close()
121
122
def add_to_ref_file(**ref_info):
123
    if os.path.exists(OUTPUT_BASENAME + REF_EXTENSION):
124
        info = read_ref_file()
125
        info.update(ref_info)
126
        write_ref_file(info)
127
    else:
128
        write_ref_file(ref_info)
129
130
######################################################################
131
#{ Directives
132
######################################################################
133
134
class example(docutils.nodes.paragraph): pass
135
136
def example_directive(name, arguments, options, content, lineno,
137
                      content_offset, block_text, state, state_machine):
138
    """
139
    Basic use::
140
141
        .. example:: John went to the store.
142
143
    To refer to examples, use::
144
145
        .. _store:
146
        .. example:: John went to the store.
147
148
        In store_, John performed an action.
149
    """
150
    text = '\n'.join(content)
151
    node = example(text)
152
    state.nested_parse(content, content_offset, node)
153
    return [node]
154
example_directive.content = True
155
directives.register_directive('example', example_directive)
156
directives.register_directive('ex', example_directive)
157
158
def doctest_directive(name, arguments, options, content, lineno,
159
                      content_offset, block_text, state, state_machine):
160
    """
161
    Used to explicitly mark as doctest blocks things that otherwise
162
    wouldn't look like doctest blocks.
163
    """
164
    text = '\n'.join(content)
165
    if re.match(r'.*\n\s*\n', block_text):
166
        warning('doctest-ignore on line %d will not be ignored, '
167
             'because there is\na blank line between ".. doctest-ignore::"'
168
             ' and the doctest example.' % lineno)
169
    return [docutils.nodes.doctest_block(text, text, codeblock=True)]
170
doctest_directive.content = True
171
directives.register_directive('doctest-ignore', doctest_directive)
172
173
_treenum = 0
174
def tree_directive(name, arguments, options, content, lineno,
175
                   content_offset, block_text, state, state_machine):
176
    global _treenum
177
    text = '\n'.join(arguments) + '\n'.join(content)
178
    _treenum += 1
179
    # Note: the two filenames generated by these two cases should be
180
    # different, to prevent conflicts.
181
    if OUTPUT_FORMAT == 'latex':
182
        density, scale = 300, 150
183
        scale = scale * options.get('scale', 100) / 100
184
        filename = '%s-tree-%s.pdf' % (OUTPUT_BASENAME, _treenum)
185
        align = LATEX_VALIGN_IS_BROKEN and 'bottom' or 'top'
186
    elif OUTPUT_FORMAT == 'html':
187
        density, scale = 100, 100
188
        density = density * options.get('scale', 100) / 100
189
        filename = '%s-tree-%s.png' % (OUTPUT_BASENAME, _treenum)
190
        align = 'top'
191
    elif OUTPUT_FORMAT == 'ref':
192
        return []
193
    else:
194
        assert 0, 'bad output format %r' % OUTPUT_FORMAT
195
    if not os.path.exists(TREE_IMAGE_DIR):
196
        os.mkdir(TREE_IMAGE_DIR)
197
    try:
198
        filename = os.path.join(TREE_IMAGE_DIR, filename)
199
        tree_to_image(text, filename, density)
200
    except Exception, e:
201
        raise
202
        warning('Error parsing tree: %s\n%s\n%s' % (e, text, filename))
203
        return [example(text, text)]
204
205
    imagenode = docutils.nodes.image(uri=filename, scale=scale, align=align)
206
    return [imagenode]
207
208
tree_directive.arguments = (1,0,1)
209
tree_directive.content = True
210
tree_directive.options = {'scale': directives.nonnegative_int}
211
directives.register_directive('tree', tree_directive)
212
213
def avm_directive(name, arguments, options, content, lineno,
214
                      content_offset, block_text, state, state_machine):
215
    text = '\n'.join(content)
216
    try:
217
        if OUTPUT_FORMAT == 'latex':
218
            latex_avm = parse_avm(textwrap.dedent(text)).as_latex()
219
            return [docutils.nodes.paragraph('','',
220
                       docutils.nodes.raw('', latex_avm, format='latex'))]
221
        elif OUTPUT_FORMAT == 'html':
222
            return [parse_avm(textwrap.dedent(text)).as_table()]
223
        elif OUTPUT_FORMAT == 'ref':
224
            return [docutils.nodes.paragraph()]
225
    except ValueError, e:
226
        if isinstance(e.args[0], int):
227
            warning('Error parsing avm on line %s' % (lineno+e.args[0]))
228
        else:
229
            raise
230
            warning('Error parsing avm on line %s: %s' % (lineno, e))
231
        node = example(text, text)
232
        state.nested_parse(content, content_offset, node)
233
        return [node]
234
avm_directive.content = True
235
directives.register_directive('avm', avm_directive)
236
237
def def_directive(name, arguments, options, content, lineno,
238
                  content_offset, block_text, state, state_machine):
239
    state_machine.document.setdefault('__defs__', {})[arguments[0]] = 1
240
    return []
241
def_directive.arguments = (1, 0, 0)
242
directives.register_directive('def', def_directive)
243
    
244
def ifdef_directive(name, arguments, options, content, lineno,
245
                    content_offset, block_text, state, state_machine):
246
    if arguments[0] in state_machine.document.get('__defs__', ()):
247
        node = docutils.nodes.compound('')
248
        state.nested_parse(content, content_offset, node)
249
        return [node]
250
    else:
251
        return []
252
ifdef_directive.arguments = (1, 0, 0)
253
ifdef_directive.content = True
254
directives.register_directive('ifdef', ifdef_directive)
255
    
256
def ifndef_directive(name, arguments, options, content, lineno,
257
                    content_offset, block_text, state, state_machine):
258
    if arguments[0] not in state_machine.document.get('__defs__', ()):
259
        node = docutils.nodes.compound('')
260
        state.nested_parse(content, content_offset, node)
261
        return [node]
262
    else:
263
        return []
264
ifndef_directive.arguments = (1, 0, 0)
265
ifndef_directive.content = True
266
directives.register_directive('ifndef', ifndef_directive)
267
268
269
######################################################################
270
#{ Table Directive
271
######################################################################
272
_table_ids = set()
273
def table_directive(name, arguments, options, content, lineno,
274
                    content_offset, block_text, state, state_machine):
275
    # The identifier for this table.
276
    if arguments:
277
        table_id = arguments[0]
278
        if table_id in _table_ids:
279
            warning("Duplicate table id %r" % table_id)
280
        _table_ids.add(table_id)
281
282
        # Create a target element for the table
283
        target = docutils.nodes.target(names=[table_id])
284
        state_machine.document.note_explicit_target(target)
285
286
    # Parse the contents.
287
    node = docutils.nodes.compound('')
288
    state.nested_parse(content, content_offset, node)
289
    if len(node) == 0 or not isinstance(node[0], docutils.nodes.table):
290
        return [state_machine.reporter.error(
291
            'Error in "%s" directive: expected table as first child' %
292
            name)]
293
294
    # Move the caption into the table.
295
    table = node[0]
296
    caption = docutils.nodes.caption('','', *node[1:])
297
    table.append(caption)
298
299
    # Return the target and the table.
300
    if arguments:
301
        return [target, table]
302
    else:
303
        return [table]
304
    
305
    
306
table_directive.arguments = (0,1,0) # 1 optional arg, no whitespace
307
table_directive.content = True
308
table_directive.options = {'caption': directives.unchanged}
309
directives.register_directive('table', table_directive)
310
311
312
######################################################################
313
#{ Program Listings
314
######################################################################
315
# We define a new attribute for doctest blocks: 'is_codeblock'.  If
316
# this attribute is true, then the block contains python code only
317
# (i.e., don't expect to find prompts.)
318
319
class pylisting(docutils.nodes.General, docutils.nodes.Element):
320
    """
321
    Python source code listing.
322
323
    Children: doctest_block+ caption?
324
    """
325
class callout_marker(docutils.nodes.Inline, docutils.nodes.Element):
326
    """
327
    A callout marker for doctest block.  This element contains no
328
    children; and defines the attribute 'number'.
329
    """
330
331
DOCTEST_BLOCK_RE = re.compile('((?:[ ]*>>>.*\n?(?:.*[^ ].*\n?)+\s*)+)',
332
                              re.MULTILINE)
333
CALLOUT_RE = re.compile(r'#[ ]+\[_([\w-]+)\][ ]*$', re.MULTILINE)
334
335
from docutils.nodes import fully_normalize_name as normalize_name
336
337
_listing_ids = set()
338
def pylisting_directive(name, arguments, options, content, lineno,
339
                      content_offset, block_text, state, state_machine):
340
    # The identifier for this listing.
341
    listing_id = arguments[0]
342
    if listing_id in _listing_ids:
343
        warning("Duplicate listing id %r" % listing_id)
344
    _listing_ids.add(listing_id)
345
    
346
    # Create the pylisting element itself.
347
    listing = pylisting('\n'.join(content), name=listing_id, callouts={})
348
349
    # Create a target element for the pylisting.
350
    target = docutils.nodes.target(names=[listing_id])
351
    state_machine.document.note_explicit_target(target)
352
353
    # Divide the text into doctest blocks.
354
    for i, v in enumerate(DOCTEST_BLOCK_RE.split('\n'.join(content))):
355
        pysrc = re.sub(r'\A( *\n)+', '', v.rstrip())
356
        if pysrc.strip():
357
            listing.append(docutils.nodes.doctest_block(pysrc, pysrc,
358
                                                        is_codeblock=(i%2==0)))
359
360
    # Add an optional caption.
361
    if options.get('caption'):
362
        cap = options['caption'].split('\n')
363
        caption = docutils.nodes.compound()
364
        state.nested_parse(docutils.statemachine.StringList(cap),
365
                           content_offset, caption)
366
        if (len(caption) == 1 and isinstance(caption[0],
367
                                             docutils.nodes.paragraph)):
368
            listing.append(docutils.nodes.caption('', '', *caption[0]))
369
        else:
370
            warning("Caption should be a single paragraph")
371
            listing.append(docutils.nodes.caption('', '', *caption))
372
373
    return [target, listing]
374
375
pylisting_directive.arguments = (1,0,0) # 1 required arg, no whitespace
376
pylisting_directive.content = True
377
pylisting_directive.options = {'caption': directives.unchanged}
378
directives.register_directive('pylisting', pylisting_directive)
379
380
def callout_directive(name, arguments, options, content, lineno,
381
                      content_offset, block_text, state, state_machine):
382
    if arguments:
383
        prefix = '%s-' % arguments[0]
384
    else:
385
        prefix = ''
386
    node = docutils.nodes.compound('')
387
    state.nested_parse(content, content_offset, node)
388
    if not (len(node.children) == 1 and
389
            isinstance(node[0], docutils.nodes.field_list)):
390
        return [state_machine.reporter.error(
391
            'Error in "%s" directive: may contain a single defintion '
392
            'list only.' % (name), line=lineno)]
393
394
    node[0]['classes'] = ['callouts']
395
    for field in node[0]:
396
        if len(field[0]) != 1:
397
            return [state_machine.reporter.error(
398
                'Error in "%s" directive: bad field id' % (name), line=lineno)]
399
            
400
        field_name = prefix+('%s' % field[0][0])
401
        field[0].clear()
402
        field[0].append(docutils.nodes.reference(field_name, field_name,
403
                                                 refid=field_name))
404
        field[0]['classes'] = ['callout']
405
406
    return [node[0]]
407
408
callout_directive.arguments = (0,1,0) # 1 optional arg, no whitespace
409
callout_directive.content = True
410
directives.register_directive('callouts', callout_directive)
411
412
_OPTION_DIRECTIVE_RE = re.compile(
413
    r'(\n[ ]*\.\.\.[ ]*)?#\s*doctest:\s*([^\n\'"]*)$', re.MULTILINE)
414
def strip_doctest_directives(text):
415
    return _OPTION_DIRECTIVE_RE.sub('', text)
416
417
418
######################################################################
419
#{ RST In/Out table
420
######################################################################
421
422
def rst_example_directive(name, arguments, options, content, lineno,
423
                    content_offset, block_text, state, state_machine):
424
    raw = docutils.nodes.literal_block('', '\n'.join(content))
425
    out = docutils.nodes.compound('')
426
    state.nested_parse(content, content_offset, out)
427
    if OUTPUT_FORMAT == 'latex':
428
        return [
429
            docutils.nodes.definition_list('',
430
              docutils.nodes.definition_list_item('',
431
                docutils.nodes.term('','Input'),
432
                docutils.nodes.definition('', raw)),
433
              docutils.nodes.definition_list_item('',
434
                docutils.nodes.term('','Rendered'),
435
                docutils.nodes.definition('', out)))]
436
    else:
437
        return [
438
            docutils.nodes.table('',
439
              docutils.nodes.tgroup('',
440
                docutils.nodes.colspec(colwidth=5,classes=['rst-raw']),
441
                docutils.nodes.colspec(colwidth=5),
442
                docutils.nodes.thead('',
443
                  docutils.nodes.row('',
444
                    docutils.nodes.entry('',
445
                      docutils.nodes.paragraph('','Input')),
446
                    docutils.nodes.entry('',
447
                      docutils.nodes.paragraph('','Rendered')))),
448
                docutils.nodes.tbody('',
449
                  docutils.nodes.row('',
450
                    docutils.nodes.entry('',raw),
451
                    docutils.nodes.entry('',out)))),
452
              classes=["rst-example"])]
453
454
rst_example_directive.arguments = (0, 0, 0)
455
rst_example_directive.content = True
456
directives.register_directive('rst_example', rst_example_directive)
457
458
459
######################################################################
460
#{ Glosses
461
######################################################################
462
463
"""
464
.. gloss::
465
   This  | is | used | to | make | aligned | glosses.
466
    NN   | BE |  VB  | TO |  VB  |  JJ     |   NN
467
   *Foog blogg blarg.*
468
"""
469
470
class gloss(docutils.nodes.Element): "glossrow+"
471
class glossrow(docutils.nodes.Element): "paragraph+"
472
473
def gloss_directive(name, arguments, options, content, lineno,
474
                    content_offset, block_text, state, state_machine):
475
    # Transform into a table.
476
    lines = list(content)
477
    maxlen = max(len(line) for line in lines)
478
    lines = [('|%-'+`maxlen`+'s|') % line for line in lines]
479
    tablestr = ''
480
    prevline = ''
481
    for line in (lines+['']):
482
        div = ['-']*(maxlen+2)
483
        for m in re.finditer(r'\|', prevline):
484
            div[m.start()] = '+'
485
        for m in re.finditer(r'\|', line):
486
            div[m.start()] = '+'
487
        tablestr += ''.join(div) + '\n' + line + '\n'
488
        prevline = line
489
    table_lines = tablestr.strip().split('\n')
490
    new_content = docutils.statemachine.StringList(table_lines)
491
    # [XX] DEBUG GLOSSES:
492
    # print 'converted to:'
493
    # print tablestr
494
495
    # Parse the table.
496
    node = docutils.nodes.compound('')
497
    state.nested_parse(new_content, content_offset, node)
498
    if not (len(node.children) == 1 and
499
            isinstance(node[0], docutils.nodes.table)):
500
        error = state_machine.reporter.error(
501
            'Error in "%s" directive: may contain a single table '
502
            'only.' % (name), line=lineno)
503
        return [error]
504
    table = node[0]
505
    table['classes'] = ['gloss', 'nolines']
506
    
507
    colspecs = table[0]
508
    for colspec in colspecs:
509
        colspec['colwidth'] = colspec.get('colwidth',4)/2
510
    
511
    return [example('', '', table)]
512
gloss_directive.arguments = (0, 0, 0)
513
gloss_directive.content = True
514
directives.register_directive('gloss', gloss_directive)
515
516
517
######################################################################
518
#{ Bibliography
519
######################################################################
520
521
class Citations(Transform):
522
    default_priority = 500 # before footnotes.
523
    def apply(self):
524
        if not os.path.exists(BIBTEX_FILE):
525
            warning('Warning bibtex file %r not found.  '
526
                    'Not linking citations.' % BIBTEX_FILE)
527
            return
528
        bibliography = self.read_bibinfo(BIBTEX_FILE)
529
        for k, citation_refs in self.document.citation_refs.items():
530
            for citation_ref in citation_refs[:]:
531
                cite = bibliography.get(citation_ref['refname'].lower())
532
                if cite:
533
                    new_cite = self.citeref(cite, citation_ref['refname'])
534
                    citation_ref.replace_self(new_cite)
535
                    self.document.citation_refs[k].remove(citation_ref)
536
537
    def citeref(self, cite, key):
538
        if LOCAL_BIBLIOGRAPHY:
539
            return docutils.nodes.raw('', '\cite{%s}' % key, format='latex')
540
        else:
541
            return docutils.nodes.reference('', '', docutils.nodes.Text(cite),
542
                                    refuri='%s#%s' % (BIBLIOGRAPHY_HTML, key))
543
544
    BIB_ENTRY = re.compile(r'@\w+{.*')
545
    def read_bibinfo(self, filename):
546
        bibliography = {} # key -> authors, year
547
        key = None
548
        for line in open(filename):
549
            line = line.strip()
550
            
551
            # @InProceedings{<key>,
552
            m = re.match(r'@\w+{([^,]+),$', line)
553
            if m:
554
                key = m.group(1).strip().lower()
555
                bibliography[key] = [None, None]
556
                
557
            #   author = <authors>,
558
            m = re.match(r'(?i)author\s*=\s*(.*)$', line)
559
            if m and key:
560
                bibliography[key][0] = self.bib_authors(m.group(1))
561
            else:
562
                m = re.match(r'(?i)editor\s*=\s*(.*)$', line)
563
                if m and key:
564
                    bibliography[key][0] = self.bib_authors(m.group(1))
565
                
566
            #   year = <year>,
567
            m = re.match(r'(?i)year\s*=\s*(.*)$', line)
568
            if m and key:
569
                bibliography[key][1] = self.bib_year(m.group(1))
570
        for key in bibliography:
571
            if bibliography[key][0] is None: warning('no author found:', key)
572
            if bibliography[key][1] is None: warning('no year found:', key)
573
            bibliography[key] = '[%s, %s]' % tuple(bibliography[key])
574
            #debug('%20s %s' % (key, `bibliography[key]`))
575
        return bibliography
576
577
    def bib_year(self, year):
578
        return re.sub(r'["\'{},]', "", year)
579
580
    def bib_authors(self, authors):
581
        # Strip trailing comma:
582
        if authors[-1:] == ',': authors=authors[:-1]
583
        # Strip quotes or braces:
584
        authors = re.sub(r'"(.*)"$', r'\1', authors)
585
        authors = re.sub(r'{(.*)}$', r'\1', authors)
586
        authors = re.sub(r"'(.*)'$", r'\1', authors)
587
        # Split on 'and':
588
        authors = re.split(r'\s+and\s+', authors)
589
        # Keep last name only:
590
        authors = [a.split()[-1] for a in authors]
591
        # Combine:
592
        if len(authors) == 1:
593
            return authors[0]
594
        elif len(authors) == 2:
595
            return '%s & %s' % tuple(authors)
596
        elif len(authors) == 3:
597
            return '%s, %s, & %s' % tuple(authors)
598
        else:
599
            return '%s et al' % authors[0]
600
        return authors
601
602
603
######################################################################
604
#{ Indexing
605
######################################################################
606
class termdef(docutils.nodes.Inline, docutils.nodes.TextElement): pass
607
class idxterm(docutils.nodes.Inline, docutils.nodes.TextElement): pass
608
class index(docutils.nodes.Element): pass
609
610
def idxterm_role(name, rawtext, text, lineno, inliner,
611
                 options={}, content=[]):
612
    if name == 'dt': options['classes'] = ['termdef']
613
    elif name == 'topic': options['classes'] = ['topic']
614
    else: options['classes'] = ['term']
615
    # Recursively parse the contents of the index term, in case it
616
    # contains a substitiution (like |alpha|).
617
    nodes, msgs = inliner.parse(text, lineno, memo=inliner,
618
                                parent=inliner.parent)
619
    return [idxterm(rawtext, '', *nodes, **options)], []
620
621
roles.register_canonical_role('dt', idxterm_role)
622
roles.register_canonical_role('idx', idxterm_role)
623
roles.register_canonical_role('topic', idxterm_role)
624
625
def index_directive(name, arguments, options, content, lineno,
626
                    content_offset, block_text, state, state_machine):
627
    pending = docutils.nodes.pending(ConstructIndex)
628
    pending.details.update(options)
629
    state_machine.document.note_pending(pending)
630
    return [index('', pending)]
631
index_directive.arguments = (0, 0, 0)
632
index_directive.content = False
633
index_directive.options = {'extern': directives.flag}
634
directives.register_directive('index', index_directive)
635
636
637
class SaveIndexTerms(Transform):
638
    default_priority = 810 # before NumberReferences transform
639
    def apply(self):
640
        v = FindTermVisitor(self.document)
641
        self.document.walkabout(v)
642
        
643
        if OUTPUT_FORMAT == 'ref':
644
            add_to_ref_file(terms=v.terms)
645
646
class ConstructIndex(Transform):
647
    default_priority = 820 # after NumberNodes, before NumberReferences.
648
    def apply(self):
649
        # Find any indexed terms in this document.
650
        v = FindTermVisitor(self.document)
651
        self.document.walkabout(v)
652
        terms = v.terms
653
654
        # Check the extern reference files for additional terms.
655
        if 'extern' in self.startnode.details:
656
            for filename in EXTERN_REFERENCE_FILES:
657
                basename = os.path.splitext(filename)[0]
658
                terms.update(read_ref_file(basename)['terms'])
659
660
        # Build the index & insert it into the document.
661
        index_node = self.build_index(terms)
662
        self.startnode.replace_self(index_node)
663
664
    def build_index(self, terms):
665
        if not terms: return []
666
        
667
        top = docutils.nodes.bullet_list('', classes=['index'])
668
        start_letter = None
669
        
670
        section = None
671
        for key in sorted(terms.keys()):
672
            if key[:1] != start_letter:
673
                top.append(docutils.nodes.list_item(
674
                    '', docutils.nodes.paragraph('', key[:1].upper()+'\n',
675
                                                 classes=['index-heading']),
676
                    docutils.nodes.bullet_list('', classes=['index-section']),
677
                    classes=['index']))
678
                section = top[-1][-1]
679
            section.append(self.entry(terms[key]))
680
            start_letter = key[:1]
681
        
682
        return top
683
684
    def entry(self, term_info):
685
        entrytext, name, sectnum = term_info
686
        if sectnum is not None:
687
            entrytext.append(docutils.nodes.emphasis('', ' (%s)' % sectnum))
688
        ref = docutils.nodes.reference('', '', refid=name,
689
                                       #resolved=True,
690
                                       *entrytext)
691
        para = docutils.nodes.paragraph('', '', ref)
692
        return docutils.nodes.list_item('', para, classes=['index'])
693
694
class FindTermVisitor(docutils.nodes.SparseNodeVisitor):
695
    def __init__(self, document):
696
        self.terms = {}
697
        docutils.nodes.NodeVisitor.__init__(self, document)
698
    def unknown_visit(self, node): pass
699
    def unknown_departure(self, node): pass
700
701
    def visit_idxterm(self, node):
702
        node['name'] = node['id'] = self.idxterm_key(node)
703
        node['names'] = node['ids'] = [node['id']]
704
        container = self.container_section(node)
705
        
706
        entrytext = node.deepcopy()
707
        if container: sectnum = container.get('sectnum')
708
        else: sectnum = '0'
709
        name = node['name']
710
        self.terms[node['name']] = (entrytext, name, sectnum)
711
            
712
    def idxterm_key(self, node):
713
        key = re.sub('\W', '_', node.astext().lower())+'_index_term'
714
        if key not in self.terms: return key
715
        n = 2
716
        while '%s_%d' % (key, n) in self.terms: n += 1
717
        return '%s_%d' % (key, n)
718
719
    def container_section(self, node):
720
        while not isinstance(node, docutils.nodes.section):
721
            if node.parent is None: return None
722
            else: node = node.parent
723
        return node
724
725
726
727
######################################################################
728
#{ Crossreferences
729
######################################################################
730
731
class ResolveExternalCrossrefs(Transform):
732
    """
733
    Using the information from EXTERN_REFERENCE_FILES, look for any
734
    links to external targets, and set their `refuid` appropriately.
735
    Also, if they are a figure, section, table, or example, then
736
    replace the link of the text with the appropriate counter.
737
    """
738
    default_priority = 849 # right before dangling refs
739
740
    def apply(self):
741
        ref_dict = self.build_ref_dict()
742
        v = ExternalCrossrefVisitor(self.document, ref_dict)
743
        self.document.walkabout(v)
744
745
    def build_ref_dict(self):
746
        """{target -> (uri, label)}"""
747
        ref_dict = {}
748
        for filename in EXTERN_REFERENCE_FILES:
749
            basename = os.path.splitext(filename)[0]
750
            if OUTPUT_FORMAT == 'html':
751
                uri = os.path.split(basename)[-1]+'.html'
752
            else:
753
                uri = os.path.split(basename)[-1]+'.pdf'
754
            if basename == OUTPUT_BASENAME:
755
                pass # don't read our own ref file.
756
            elif not os.path.exists(basename+REF_EXTENSION):
757
                warning('%s does not exist' % (basename+REF_EXTENSION))
758
            else:
759
                ref_info = read_ref_file(basename)
760
                for ref in ref_info['targets']:
761
                    label = ref_info['reference_labels'].get(ref)
762
                    ref_dict[ref] = (uri, label)
763
764
        return ref_dict
765
    
766
class ExternalCrossrefVisitor(docutils.nodes.NodeVisitor):
767
    def __init__(self, document, ref_dict):
768
        docutils.nodes.NodeVisitor.__init__(self, document)
769
        self.ref_dict = ref_dict
770
    def unknown_visit(self, node): pass
771
    def unknown_departure(self, node): pass
772
773
    # Don't mess with the table of contents.
774
    def visit_topic(self, node):
775
        if 'contents' in node.get('classes', ()):
776
            raise docutils.nodes.SkipNode
777
778
    def visit_reference(self, node):
779
        if node.resolved: return
780
        node_id = node.get('refid') or node.get('refname')
781
        if node_id in self.ref_dict:
782
            uri, label = self.ref_dict[node_id]
783
            #debug('xref: %20s -> %-30s (label=%s)' % (
784
            #    node_id, uri+'#'+node_id, label))
785
            node['refuri'] = '%s#%s' % (uri, node_id)
786
            node.resolved = True
787
788
            if label is not None:
789
                if node.get('expanded_ref'):
790
                    warning('Label %s is defined both locally (%s) and '
791
                            'externally (%s)' % (node_id, node[0], label))
792
                    # hmm...
793
                else:
794
                    node.clear()
795
                    node.append(docutils.nodes.Text(label))
796
                    expand_reference_text(node)
797
798
######################################################################
799
#{ Exercises
800
######################################################################
801
802
"""
803
.. exercise:: path.xml
804
"""
805
806
class exercise(docutils.nodes.paragraph,docutils.nodes.Element): pass
807
808
def exercise_directive(name, arguments, options, content, lineno,
809
                    content_offset, block_text, state, state_machine):
810
    return [exercise('', arguments[0])]
811
812
exercise_directive.arguments = (1, 0, 0)
813
exercise_directive.content = False
814
directives.register_directive('exercise', exercise_directive)
815
816
817
######################################################################
818
#{ Challenges (optional exercises; harder than usual)
819
######################################################################
820
821
"""
822
.. challenge:: path.xml
823
"""
824
825
class challenge(docutils.nodes.paragraph,docutils.nodes.Element): pass
826
827
def challenge_directive(name, arguments, options, content, lineno,
828
                    content_offset, block_text, state, state_machine):
829
    return [challenge('', arguments[0])]
830
831
challenge_directive.arguments = (1, 0, 0)
832
challenge_directive.content = False
833
directives.register_directive('challenge', challenge_directive)
834
835
836
837
######################################################################
838
#{ Figure & Example Numbering
839
######################################################################
840
841
# [xx] number examples, figures, etc, relative to chapter?  e.g.,
842
# figure 3.2?  maybe number examples within-chapter, but then restart
843
# the counter?
844
845
class section_context(docutils.nodes.Invisible, docutils.nodes.Element):
846
    def __init__(self, context):
847
        docutils.nodes.Element.__init__(self, '', context=context)
848
        assert self['context'] in ('body', 'preface', 'appendix')
849
850
def section_context_directive(name, arguments, options, content, lineno,
851
                       content_offset, block_text, state, state_machine):
852
    return [section_context(name)]
853
section_context_directive.arguments = (0,0,0)
854
directives.register_directive('preface', section_context_directive)
855
directives.register_directive('body', section_context_directive)
856
directives.register_directive('appendix', section_context_directive)
857
        
858
class NumberNodes(Transform):
859
    """
860
    This transform adds numbers to figures, tables, and examples; and
861
    converts references to the figures, tables, and examples to use
862
    these numbers.  For example, given the rst source::
863
864
        .. _my_example:
865
        .. ex:: John likes Mary.
866
867
        See example my_example_.
868
869
    This transform will assign a number to the example, '(1)', and
870
    will replace the following text with 'see example (1)', with an
871
    appropriate link.
872
    """
873
    # dangling = 850; contents = 720.
874
    default_priority = 800
875
    def apply(self):
876
        v = NumberingVisitor(self.document)
877
        self.document.walkabout(v)
878
        self.document.reference_labels = v.reference_labels
879
        self.document.callout_labels = v.callout_labels
880
881
class NumberReferences(Transform):
882
    default_priority = 830
883
    def apply(self):
884
        v = ReferenceVisitor(self.document, self.document.reference_labels,
885
                             self.document.callout_labels)
886
        self.document.walkabout(v)
887
888
        # Save reference info to a pickle file.
889
        if OUTPUT_FORMAT == 'ref':
890
            add_to_ref_file(reference_labels=self.document.reference_labels,
891
                            targets=v.targets)
892
893
class NumberingVisitor(docutils.nodes.NodeVisitor):
894
    """
895
    A transforming visitor that adds figure numbers to all figures,
896
    and converts any references to figures to use the text 'Figure #';
897
    and adds example numbers to all examples, and converts any
898
    references to examples to use the text 'Example #'.
899
    """
900
    LETTERS = 'abcdefghijklmnopqrstuvwxyz'
901
    ROMAN = 'i ii iii iv v vi vii viii ix x'.split()
902
    ROMAN += ['x%s' % r for r in ROMAN]
903
    
904
    def __init__(self, document):
905
        docutils.nodes.NodeVisitor.__init__(self, document)
906
        self.reference_labels = {}
907
        self.figure_num = 0
908
        self.table_num = 0
909
        self.example_num = [0]
910
        self.section_num = [0]
911
        self.listing_num = 0
912
        self.callout_labels = {} # name -> number
913
        self.set_section_context = None
914
        self.section_context = 'body' # preface, appendix, body
915
        
916
    #////////////////////////////////////////////////////////////
917
    # Figures
918
    #////////////////////////////////////////////////////////////
919
920
    def visit_figure(self, node):
921
        self.figure_num += 1
922
        num = '%s.%s' % (self.format_section_num(1), self.figure_num)
923
        for node_id in self.get_ids(node):
924
            self.reference_labels[node_id] = '%s' % num
925
        self.label_node(node, 'Figure %s' % num)
926
            
927
    #////////////////////////////////////////////////////////////
928
    # Tables
929
    #////////////////////////////////////////////////////////////
930
931
    def visit_table(self, node):
932
        if 'avm' in node['classes']: return
933
        if 'gloss' in node['classes']: return
934
        if 'rst-example' in node['classes']: return
935
        if 'doctest-list' in node['classes']: return
936
        self.table_num += 1
937
        num = '%s.%s' % (self.format_section_num(1), self.table_num)
938
        for node_id in self.get_ids(node):
939
            self.reference_labels[node_id] = '%s' % num
940
        self.label_node(node, 'Table %s' % num)
941
942
    #////////////////////////////////////////////////////////////
943
    # Listings
944
    #////////////////////////////////////////////////////////////
945
946
    def visit_pylisting(self, node):
947
        self.listing_num += 1
948
        num = '%s.%s' % (self.format_section_num(1), self.listing_num)
949
        for node_id in self.get_ids(node):
950
            self.reference_labels[node_id] = '%s' % num
951
        pyfile = re.sub('\W', '_', node['name']) + PYLISTING_EXTENSION
952
        self.label_node(node, 'Listing %s (%s)' % (num, pyfile),
953
                      PYLISTING_DIR + pyfile)
954
        self.callout_labels.update(node['callouts'])
955
956
    def visit_doctest_block(self, node):
957
        if isinstance(node.parent, pylisting):
958
            callouts = node['callouts'] = node.parent['callouts']
959
        else:
960
            callouts = node['callouts'] = {}
961
        
962
        pysrc = ''.join(('%s' % c) for c in node)
963
        for callout_id in CALLOUT_RE.findall(pysrc):
964
            callouts[callout_id] = len(callouts)+1
965
        self.callout_labels.update(callouts)
966
967
    #////////////////////////////////////////////////////////////
968
    # Sections
969
    #////////////////////////////////////////////////////////////
970
    max_section_depth = 3
971
    no_section_numbers_in_preface = True
972
    TOP_SECTION = 'chapter'
973
974
    # [xx] I don't think this currently does anything..
975
    def visit_document(self, node):
976
        if (len(node)>0 and isinstance(node[0], docutils.nodes.title) and
977
            isinstance(node[0].children[0], docutils.nodes.Text) and
978
            re.match(r'(\d+(.\d+)*)\.?\s+', node[0].children[0].data)):
979
                node['sectnum'] = node[0].children[0].data.split()[0]
980
                for node_id in node.get('ids', []):
981
                    self.reference_labels[node_id] = '%s' % node['sectnum']
982
983
    def visit_section(self, node):
984
        title = node[0]
985
        
986
        # Check if we're entering a new context.
987
        if len(self.section_num) == 1 and self.set_section_context:
988
            self.start_new_context(node)
989
990
        # Record the section's context in its title.
991
        title['section_context'] = self.section_context
992
993
        # Increment the section counter.
994
        self.section_num[-1] += 1
995
        
996
        # If a section number is given explicitly as part of the
997
        # title, then it overrides our counter.
998
        if isinstance(title.children[0], docutils.nodes.Text):
999
            m = re.match(r'(\d+(.\d+)*)\.?\s+', title.children[0].data)
1000
            if m:
1001
                pieces = [int(n) for n in m.group(1).split('.')]
1002
                if len(pieces) == len(self.section_num):
1003
                    self.section_num = pieces
1004
                    title.children[0].data = title.children[0].data[m.end():]
1005
                else:
1006
                    warning('Explicit section number (%s) does not match '
1007
                         'current section depth' % m.group(1))
1008
                self.prepend_raw_latex(node, r'\setcounter{%s}{%d}' %
1009
                               (self.TOP_SECTION, self.section_num[0]-1))
1010
1011
        # Record the reference pointer for this section; and add the
1012
        # section number to the section title.
1013
        node['sectnum'] = self.format_section_num()
1014
        for node_id in node.get('ids', []):
1015
            self.reference_labels[node_id] = '%s' % node['sectnum']
1016
        if (len(self.section_num) <= self.max_section_depth and
1017
            (OUTPUT_FORMAT != 'latex') and
1018
            not (self.section_context == 'preface' and
1019
                 self.no_section_numbers_in_preface)):
1020
            label = docutils.nodes.generated('', node['sectnum']+u'\u00a0'*3,
1021
                                             classes=['sectnum'])
1022
            title.insert(0, label)
1023
            title['auto'] = 1
1024
1025
        # Record the section number.
1026
        self.section_num.append(0)
1027
1028
        # If this was a top-level section, then restart the figure,
1029
        # table, and listing counters
1030
        if len(self.section_num) == 2:
1031
            self.figure_num = 0
1032
            self.table_num = 0
1033
            self.listing_num = 0
1034
1035
    def start_new_context(self,node):
1036
        # Set the 'section_context' var.
1037
        self.section_context = self.set_section_context
1038
        self.set_section_context = None
1039
1040
        # Update our counter.
1041
        self.section_num[0] = 0
1042
1043
        # Update latex's counter.
1044
        if self.section_context == 'preface': style = 'Roman'
1045
        elif self.section_context == 'body': style = 'arabic'
1046
        elif self.section_context == 'appendix': style = 'Alph'
1047
        raw_latex = (('\n'+r'\setcounter{%s}{0}' + '\n' + 
1048
                      r'\renewcommand \the%s{\%s{%s}}'+'\n') %
1049
               (self.TOP_SECTION, self.TOP_SECTION, style, self.TOP_SECTION))
1050
        if self.section_context == 'appendix':
1051
            raw_latex += '\\appendix\n'
1052
        self.prepend_raw_latex(node, raw_latex)
1053
1054
    def prepend_raw_latex(self, node, raw_latex):
1055
        if isinstance(node, docutils.nodes.document):
1056
            node.insert(0, docutils.nodes.raw('', raw_latex, format='latex'))
1057
        else:
1058
            node_index = node.parent.children.index(node)
1059
            node.parent.insert(node_index, docutils.nodes.raw('', raw_latex,
1060
                                                              format='latex'))
1061
        
1062
    def depart_section(self, node):
1063
        self.section_num.pop()
1064
1065
    def format_section_num(self, depth=None):
1066
        pieces = [('%s' % p) for p in self.section_num]
1067
        if self.section_context == 'body':
1068
            pieces[0] = ('%s' % self.section_num[0])
1069
        elif self.section_context == 'preface':
1070
            pieces[0] = self.ROMAN[self.section_num[0]-1].upper()
1071
        elif self.section_context == 'appendix':
1072
            pieces[0] = self.LETTERS[self.section_num[0]-1].upper()
1073
        else:
1074
            assert 0, 'unexpected section context'
1075
        if depth is None:
1076
            return '.'.join(pieces)
1077
        else:
1078
            return '.'.join(pieces[:depth])
1079
            
1080
            
1081
    def visit_section_context(self, node):
1082
        assert node['context'] in ('body', 'preface', 'appendix')
1083
        self.set_section_context = node['context']
1084
        node.replace_self([])
1085
1086
    #////////////////////////////////////////////////////////////
1087
    # Examples
1088
    #////////////////////////////////////////////////////////////
1089
    NESTED_EXAMPLES = True
1090
1091
    def visit_example(self, node):
1092
        self.example_num[-1] += 1
1093
        node['num'] = self.short_example_num()
1094
        for node_id in self.get_ids(node):
1095
            self.reference_labels[node_id] = self.format_example_num()
1096
        self.example_num.append(0)
1097
1098
    def depart_example(self, node):
1099
        if not self.NESTED_EXAMPLES:
1100
            if self.example_num[-1] > 0:
1101
                # If the example contains a list of subexamples, then
1102
                # splice them in to our parent.
1103
                node.replace_self(list(node))
1104
        self.example_num.pop()
1105
1106
    def short_example_num(self):
1107
        if len(self.example_num) == 1:
1108
            return '(%s)' % self.example_num[0]
1109
        if len(self.example_num) == 2:
1110
            return '%s.' % self.LETTERS[self.example_num[1]-1]
1111
        if len(self.example_num) == 3:
1112
            return '%s.' % self.ROMAN[self.example_num[2]-1]
1113
        else:
1114
            return '%s.' % self.example_num[-1]
1115
1116
    def format_example_num(self):
1117
        """ (1), (2); (1a), (1b); (1a.i), (1a.ii)"""
1118
        ex_num = ('%s' % self.example_num[0])
1119
        if len(self.example_num) > 1:
1120
            ex_num += self.LETTERS[self.example_num[1]-1]
1121
        if len(self.example_num) > 2:
1122
            ex_num += '.%s' % self.ROMAN[self.example_num[2]-1]
1123
        for n in self.example_num[3:]:
1124
            ex_num += '.%s' % n
1125
        return '(%s)' % ex_num
1126
1127
    #////////////////////////////////////////////////////////////
1128
    # Helpers
1129
    #////////////////////////////////////////////////////////////
1130
1131
    def unknown_visit(self, node): pass
1132
    def unknown_departure(self, node): pass
1133
1134
    def get_ids(self, node):
1135
        node_index = node.parent.children.index(node)
1136
        if node_index>0 and isinstance(node.parent[node_index-1],
1137
                                       docutils.nodes.target):
1138
            target = node.parent[node_index-1]
1139
            if target.has_key('refid'):
1140
                refid = target['refid']
1141
                target['ids'] = [refid]
1142
                del target['refid']
1143
                return [refid]
1144
            elif target.has_key('ids'):
1145
                return target['ids']
1146
            else:
1147
                warning('unable to find id for %s' % target)
1148
                return []
1149
        return []
1150
1151
    def label_node(self, node, label, refuri=None, cls='caption-label'):
1152
        if not isinstance(node[-1], docutils.nodes.caption):
1153
            node.append(docutils.nodes.caption())
1154
        caption = node[-1]
1155
1156
        if OUTPUT_FORMAT == 'html':
1157
            cap = docutils.nodes.inline('', label, classes=[cls])
1158
            if refuri:
1159
                cap = docutils.nodes.reference('', '', cap, refuri=refuri,
1160
                                               mimetype='text/x-python')
1161
            caption.insert(0, cap)
1162
            if len(caption) > 1:
1163
                caption.insert(1, docutils.nodes.Text(': '))
1164
        
1165
class ReferenceVisitor(docutils.nodes.NodeVisitor):
1166
    def __init__(self, document, reference_labels, callout_labels):
1167
        self.reference_labels = reference_labels
1168
        self.callout_labels = callout_labels
1169
        self.targets = set()
1170
        docutils.nodes.NodeVisitor.__init__(self, document)
1171
    def unknown_visit(self, node):
1172
        if isinstance(node, docutils.nodes.Element):
1173
            self.targets.update(node.get('names', []))
1174
            self.targets.update(node.get('ids', []))
1175
    def unknown_departure(self, node): pass
1176
1177
    # Don't mess with the table of contents.
1178
    def visit_topic(self, node):
1179
        if 'contents' in node.get('classes', ()):
1180
            raise docutils.nodes.SkipNode
1181
1182
    def visit_reference(self, node):
1183
        node_id = (node.get('refid') or
1184
                   self.document.nameids.get(node.get('refname')) or
1185
                   node.get('refname'))
1186
        if node_id in self.reference_labels:
1187
            label = self.reference_labels[node_id]
1188
            node.clear()
1189
            node.append(docutils.nodes.Text(label))
1190
            expand_reference_text(node)
1191
        elif node_id in self.callout_labels:
1192
            label = self.callout_labels[node_id]
1193
            node.clear()
1194
            node.append(callout_marker(number=label, name='ref-%s' % node_id))
1195
            expand_reference_text(node)
1196
            # There's no explicitly encoded target element, so manually
1197
            # resolve the reference:
1198
            node['refid'] = node_id
1199
            node.resolved = True
1200
1201
_EXPAND_REF_RE = re.compile(r'(?is)^(.*)(%s)\s+$' % '|'.join(
1202
    ['figure', 'table', 'example', 'chapter', 'section', 'appendix',
1203
     'sentence', 'tree', 'listing', 'program']))
1204
def expand_reference_text(node):
1205
    """If the reference is immediately preceeded by the word 'figure'
1206
    or the word 'table' or 'example', then include that word in the
1207
    link (rather than just the number)."""
1208
    if node.get('expanded_ref'):
1209
        assert 0, ('Already expanded!!  %s' % node)
1210
    node_index = node.parent.children.index(node)
1211
    if node_index > 0:
1212
        prev_node = node.parent.children[node_index-1]
1213
        if (isinstance(prev_node, docutils.nodes.Text)):
1214
            m = _EXPAND_REF_RE.match(prev_node.data)
1215
            if m:
1216
                prev_node.data = m.group(1)
1217
                link = node.children[0]
1218
                link.data = '%s %s' % (m.group(2), link.data)
1219
                node['expanded_ref'] = True
1220
1221
######################################################################
1222
#{ Feature Structures (AVMs)
1223
######################################################################
1224
1225
class AVM:
1226
    def __init__(self, ident):
1227
        self.ident = ident
1228
        self.keys = []
1229
        self.vals = {}
1230
    def assign(self, key, val):
1231
        if key in self.keys: raise ValueError('duplicate key')
1232
        self.keys.append(key)
1233
        self.vals[key] = val
1234
    def __str__(self):
1235
        vals = []
1236
        for key in self.keys:
1237
            val = self.vals[key]
1238
            if isinstance(val, AVMPointer):
1239
                vals.append('%s -> %s' % (key, val.ident))
1240
            else:
1241
                vals.append('%s = %s' % (key, val))
1242
        s = '{%s}' % ', '.join(vals)
1243
        if self.ident: s += '[%s]' % self.ident
1244
        return s
1245
1246
    def as_latex(self):
1247
        return '\\begin{avm}\n%s\\end{avm}\n' % self._as_latex()
1248
1249
    def _as_latex(self, indent=0):
1250
        if self.ident: ident = '\\@%s ' % self.ident
1251
        else: ident = ''
1252
        lines = ['%s %s & %s' % (indent*'    ', key,
1253
                                 self.vals[key]._as_latex(indent+1))
1254
                 for key in self.keys]
1255
        return ident + '\\[\n' + ' \\\\\n'.join(lines) + '\\]\n'
1256
1257
    def _entry(self, val, cls):
1258
        if isinstance(val, basestring):
1259
            return docutils.nodes.entry('',
1260
                docutils.nodes.paragraph('', val), classes=[cls])
1261
        else:
1262
            return docutils.nodes.entry('', val, classes=[cls])
1263
1264
    def _pointer(self, ident):
1265
        return docutils.nodes.paragraph('', '', 
1266
                    docutils.nodes.inline(ident, ident,
1267
                                          classes=['avm-pointer']))
1268
    def as_table(self):
1269
        if not self.keys:
1270
            return docutils.nodes.paragraph('', '[]',
1271
                                            classes=['avm-empty'])
1272
        
1273
        rows = []
1274
        for key in self.keys:
1275
            val = self.vals[key]
1276
            key_node = self._entry(key, 'avm-key')
1277
            if isinstance(val, AVMPointer):
1278
                eq_node = self._entry(u'\u2192', 'avm-eq') # right arrow
1279
                val_node = self._entry(self._pointer(val.ident), 'avm-val')
1280
            elif isinstance(val, AVM):
1281
                eq_node = self._entry('=', 'avm-eq')
1282
                val_node = self._entry(val.as_table(), 'avm-val')
1283
            else:
1284
                value = ('%s' % val.val).replace(' ', u'\u00a0') # =nbsp
1285
                eq_node = self._entry('=', 'avm-eq')
1286
                val_node = self._entry(value, 'avm-val')
1287
                
1288
            rows.append(docutils.nodes.row('', key_node, eq_node, val_node))
1289
1290
            # Add left/right bracket nodes:
1291
            if len(self.keys)==1: vpos = 'topbot'
1292
            elif key == self.keys[0]: vpos = 'top'
1293
            elif key == self.keys[-1]: vpos = 'bot'
1294
            else: vpos = ''
1295
            rows[-1].insert(0, self._entry(u'\u00a0', 'avm-%sleft' % vpos))
1296
            rows[-1].append(self._entry(u'\u00a0', 'avm-%sright' % vpos))
1297
1298
            # Add id:
1299
            if key == self.keys[0] and self.ident:
1300
                rows[-1].append(self._entry(self._pointer(self.ident),
1301
                                            'avm-ident'))
1302
            else:
1303
                rows[-1].append(self._entry(u'\u00a0', 'avm-ident'))
1304
1305
        colspecs = [docutils.nodes.colspec(colwidth=1) for i in range(6)]
1306
1307
        tbody = docutils.nodes.tbody('', *rows)
1308
        tgroup = docutils.nodes.tgroup('', cols=3, *(colspecs+[tbody]))
1309
        table = docutils.nodes.table('', tgroup, classes=['avm'])
1310
        return table
1311
    
1312
class AVMValue:
1313
    def __init__(self, ident, val):
1314
        self.ident = ident
1315
        self.val = val
1316
    def __str__(self):
1317
        if self.ident: return '%s[%s]' % (self.val, self.ident)
1318
        else: return '%r' % self.val
1319
    def _as_latex(self, indent=0):
1320
        return '%s' % self.val
1321
1322
class AVMPointer:
1323
    def __init__(self, ident):
1324
        self.ident = ident
1325
    def __str__(self):
1326
        return '[%s]' % self.ident
1327
    def _as_latex(self, indent=0):
1328
        return '\\@{%s}' % self.ident
1329
1330
def parse_avm(s, ident=None):
1331
    lines = [l.rstrip() for l in s.split('\n') if l.strip()]
1332
    if not lines: raise ValueError(0)
1333
    lines.append('[%s]' % (' '*(len(lines[0])-2)))
1334
1335
    # Create our new AVM.
1336
    avm = AVM(ident)
1337
    
1338
    w = len(lines[0]) # Line width
1339
    avmval_pos = None # (left, right, top) for nested AVMs
1340
    key = None        # Key for nested AVMs
1341
    ident = None      # Identifier for nested AVMs
1342
    
1343
    NESTED = re.compile(r'\[\s+(\[.*\])\s*\]$')
1344
    ASSIGN = re.compile(r'\[\s*(?P<KEY>[^\[=>]+?)\s*'
1345
                        r'(?P<EQ>=|->)\s*'
1346
                        r'(\((?P<ID>\d+)\))?\s*'
1347
                        r'((?P<VAL>.+?))\s*\]$')
1348
    BLANK = re.compile(r'\[\s+\]$')
1349
1350
    for lineno, line in enumerate(lines):
1351
        #debug('%s %s %s %r' % (lineno, key, avmval_pos, line))
1352
        if line[0] != '[' or line[-1] != ']' or len(line) != w:
1353
            raise ValueError(lineno)
1354
1355
        nested_m = NESTED.match(line)
1356
        assign_m = ASSIGN.match(line)
1357
        blank_m = BLANK.match(line)
1358
        if not (nested_m or assign_m or blank_m):
1359
            raise ValueError(lineno)
1360
        
1361
        if nested_m or (assign_m and assign_m.group('VAL').startswith('[')):
1362
            left, right = line.index('[',1), line.rindex(']', 0, -1)+1
1363
            if avmval_pos is None:
1364
                avmval_pos = (left, right, lineno)
1365
            elif avmval_pos[:2] != (left, right):
1366
                raise ValueError(lineno)
1367
1368
        if assign_m:
1369
            if assign_m.group('VAL').startswith('['):
1370
                if key is not None: raise ValueError(lineno)
1371
                if assign_m.group('EQ') != '=': raise ValueError(lineno)
1372
                key = assign_m.group('KEY')
1373
                ident = assign_m.group('ID')
1374
            else:
1375
                if assign_m.group('EQ') == '=':
1376
                    avm.assign(assign_m.group('KEY'),
1377
                               AVMValue(assign_m.group('ID'),
1378
                                        assign_m.group('VAL')))
1379
                else:
1380
                    if assign_m.group('VAL').strip(): raise ValueError(lineno)
1381
                    avm.assign(assign_m.group('KEY'),
1382
                               AVMPointer(assign_m.group('ID')))
1383
1384
        if blank_m and avmval_pos is not None:
1385
            left, right, top = avmval_pos
1386
            valstr = '\n'.join(l[left:right] for l in lines[top:lineno])
1387
            avm.assign(key, parse_avm(valstr, ident))
1388
            key = avmval_pos = None
1389
            
1390
    return avm
1391
1392
1393
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1394
######################################################################
1395
#{ Doctest Indentation
1396
######################################################################
1397
1398
class UnindentDoctests(Transform):
1399
    """
1400
    In our source text, we have indented most of the doctest blocks,
1401
    for two reasons: it makes copy/pasting with the doctest script
1402
    easier; and it's more readable.  But we don't *actually* want them
1403
    to be included in block_quote environments when we output them.
1404
    So this transform looks for any doctest_block's that are the only
1405
    child of a block_quote, and eliminates the block_quote.
1406
    """
1407
    default_priority = 1000
1408
    def apply(self):
1409
        self.document.walkabout(UnindentDoctestVisitor(self.document))
1410
1411
class UnindentDoctestVisitor(docutils.nodes.NodeVisitor):
1412
    def __init__(self, document):
1413
        docutils.nodes.NodeVisitor.__init__(self, document)
1414
    def unknown_visit(self, node): pass
1415
    def unknown_departure(self, node): pass
1416
    def visit_block_quote(self, node):
1417
        if (len(node) == sum([1 for c in node if
1418
                              isinstance(c, docutils.nodes.doctest_block)])):
1419
            node.replace_self(list(node))
1420
        raise docutils.nodes.SkipNode()
1421
        
1422
_OPTION_DIRECTIVE_RE = re.compile(
1423
    r'(\n[ ]*\.\.\.[ ]*)?#\s*doctest:\s*([^\n\'"]*)$', re.MULTILINE)
1424
def strip_doctest_directives(text):
1425
    return _OPTION_DIRECTIVE_RE.sub('', text)
1426
1427
class pylisting(docutils.nodes.General, docutils.nodes.Element):
1428
    """
1429
    Python source code listing.
1430
1431
    Children: doctest_block+ caption?
1432
    """
1433
######################################################################
1434
#{ HTML Output
1435
######################################################################
1436
from epydoc.docwriter.html_colorize import PythonSourceColorizer
1437
import epydoc.docwriter.html_colorize
1438
epydoc.docwriter.html_colorize .PYSRC_EXPANDTO_JAVASCRIPT = ''
1439
1440
class CustomizedHTMLWriter(HTMLWriter):
1441
    settings_defaults = HTMLWriter.settings_defaults.copy()
1442
    settings_defaults.update({
1099.1.218 by Nick Chadwick
tutorials can now use RST
1443
        'stylesheet': CSS_STYLESHEET,
1444
        'stylesheet_path': None,
1445
        'output_encoding': 'unicode',
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1446
        'output_encoding_error_handler': 'xmlcharrefreplace',
1447
        })
1448
        
1449
    def __init__(self):
1450
        HTMLWriter.__init__(self)
1451
        self.translator_class = CustomizedHTMLTranslator
1452
1453
    #def translate(self):
1454
    #    postprocess(self.document)
1455
    #    HTMLWriter.translate(self)
1456
1457
class CustomizedHTMLTranslator(HTMLTranslator):
1458
    def __init__(self, document):
1459
        HTMLTranslator.__init__(self, document)
1099.1.218 by Nick Chadwick
tutorials can now use RST
1460
        self.head_prefix.append(COPY_CLIPBOARD_JS)
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1461
1462
    def visit_pylisting(self, node):
1463
        self._write_pylisting_file(node)
1464
        self.body.append(self.CODEBOX_HEADER % ('pylisting', 'pylisting'))
1465
1466
    def depart_pylisting(self, node):
1467
        self.body.append(self.CODEBOX_FOOTER)
1468
1469
    def visit_doctest_block(self, node):
1470
        # Collect the text content of the doctest block.
1099.1.218 by Nick Chadwick
tutorials can now use RST
1471
        text = ''.join(('%s' % c) for c in node)
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1472
        text = textwrap.dedent(text)
1473
        text = strip_doctest_directives(text)
1099.1.218 by Nick Chadwick
tutorials can now use RST
1474
        text = text.decode('latin1')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1475
1476
        # Colorize the contents of the doctest block.
1099.1.218 by Nick Chadwick
tutorials can now use RST
1477
        if hasattr(node, 'callouts'):
1478
            callouts = node['callouts']
1479
        else:
1480
            callouts = None
1481
        colorizer = HTMLDoctestColorizer(self.encode, callouts)
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1482
        if node.get('is_codeblock'):
1483
            pysrc = colorizer.colorize_codeblock(text)
1484
        else:
1099.1.218 by Nick Chadwick
tutorials can now use RST
1485
            try:
1486
                pysrc = colorizer.colorize_doctest(text)
1487
            except:
1488
                print '='*70
1489
                print text
1490
                print '='*70
1491
                raise
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1492
1493
        if node.get('is_codeblock'): typ = 'codeblock' 
1494
        else: typ = 'doctest'
1099.1.218 by Nick Chadwick
tutorials can now use RST
1495
        pysrc = self.CODEBOX_ROW % (typ, typ, pysrc)
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1496
1497
        if not isinstance(node.parent, pylisting):
1498
            self.body.append(self.CODEBOX_HEADER % ('doctest', 'doctest'))
1499
            self.body.append(pysrc)
1500
            self.body.append(self.CODEBOX_FOOTER)
1501
        else:
1502
            self.body.append(pysrc)
1503
            
1504
        raise docutils.nodes.SkipNode() # Content already processed
1505
1506
    CODEBOX_HEADER = ('<div class="%s">\n'
1507
                        '<table border="0" cellpadding="0" cellspacing="0" '
1508
                        'class="%s" width="95%%">\n')
1509
    CODEBOX_FOOTER = '</table></div>\n'
1510
    CODEBOX_ROW = textwrap.dedent('''\
1511
      <tr><td class="%s">
1512
      <table border="0" cellpadding="0" cellspacing="0" width="100%%">
1099.1.218 by Nick Chadwick
tutorials can now use RST
1513
      <tr><td width="1" class="copybar"
1514
              onclick="javascript:copy_%s_to_clipboard(this.nextSibling);"
1515
              >&nbsp;</td>
1516
      <td class="pysrc">%s</td>
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1517
      </tr></table></td></tr>\n''')
1518
1519
    # For generated pylisting files:
1520
    _PYLISTING_FILE_HEADER = "# Natural Language Toolkit: %s\n\n"
1521
1522
    def _write_pylisting_file(self, node):
1523
        if not os.path.exists(PYLISTING_DIR):
1524
            os.mkdir(PYLISTING_DIR)
1525
            
1526
        name = re.sub('\W', '_', node['name'])
1527
        filename = os.path.join(PYLISTING_DIR, name+PYLISTING_EXTENSION)
1528
        out = open(filename, 'w')
1529
        out.write(self._PYLISTING_FILE_HEADER % name)
1530
        for child in node:
1531
            if not isinstance(child, docutils.nodes.doctest_block):
1532
                continue
1533
            elif child['is_codeblock']:
1099.1.218 by Nick Chadwick
tutorials can now use RST
1534
                out.write(''.join(('%s' % c) for c in child)+'\n\n')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1535
            elif INCLUDE_DOCTESTS_IN_PYLISTING_FILES:
1099.1.218 by Nick Chadwick
tutorials can now use RST
1536
                lines = ''.join(('%s' % c) for c in child).split('\n')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1537
                in_doctest_block = False
1538
                for line in lines:
1539
                    if line.startswith('>>> '):
1540
                        out.write(line[4:]+'\n')
1541
                        in_doctest_block = True
1542
                    elif line.startswith('... ') and in_doctest_block:
1543
                        out.write(line[4:]+'\n')
1544
                    elif line.strip():
1545
                        if in_doctest_block:
1546
                            out.write('# Expect:\n')
1547
                        out.write('#     ' + line+'\n')
1548
                        in_doctest_block = False
1549
                    else:
1550
                        out.write(line+'\n')
1551
                        in_doctest_block = False
1552
        out.close()
1553
1554
    def visit_exercise(self, node):
1099.1.218 by Nick Chadwick
tutorials can now use RST
1555
        self.body.append('<exercise weight="1" src="')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1556
1557
    def depart_exercise(self, node):
1465 by William Grant
The rST renderer will now insert a newline after each exercise node, to work around a bug breaking rendering on Hardy.
1558
        self.body.append('"/>\n')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1559
1099.1.218 by Nick Chadwick
tutorials can now use RST
1560
    def visit_challenge(self, node):
1561
        self.body.append('<exercise weight="0" src="')
1562
1563
    def depart_challenge(self, node):
1465 by William Grant
The rST renderer will now insert a newline after each exercise node, to work around a bug breaking rendering on Hardy.
1564
        self.body.append('"/>\n')
1099.1.218 by Nick Chadwick
tutorials can now use RST
1565
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1566
    def visit_literal(self, node):
1567
        """Process text to prevent tokens from wrapping."""
1099.1.218 by Nick Chadwick
tutorials can now use RST
1568
        text = ''.join(('%s' % c) for c in node)
1569
        text = text.decode('latin1')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1570
        colorizer = HTMLDoctestColorizer(self.encode)
1571
        pysrc = colorizer.colorize_inline(text)#.strip()
1572
        #pysrc = colorize_doctestblock(text, self._markup_pysrc, True)
1573
        self.body+= [self.starttag(node, 'tt', '', CLASS='doctest'),
1574
                     '<span class="pre">%s</span></tt>' % pysrc]
1575
        raise docutils.nodes.SkipNode() # Content already processed
1576
                          
1099.1.218 by Nick Chadwick
tutorials can now use RST
1577
    def _markup_pysrc(self, s, tag):
1578
        return '\n'.join('<span class="pysrc-%s">%s</span>' %
1579
                         (tag, self.encode(line))
1580
                         for line in s.split('\n'))
1581
1582
    def visit_example(self, node):
1583
        self.body.append(
1584
            '<p><table border="0" cellpadding="0" cellspacing="0" '
1585
            'class="example">\n  '
1586
            '<tr valign="top"><td width="30" align="right">'
1587
            '%s</td><td width="15"></td><td>' % node['num'])
1588
1589
    def depart_example(self, node):
1590
        self.body.append('</td></tr></table></p>\n')
1591
1592
    def visit_idxterm(self, node):
1593
        self.body.append('<span class="%s">' % ' '.join(node['classes']))
1594
        if 'topic' in node['classes']: raise docutils.nodes.SkipChildren
1595
        
1596
    def depart_idxterm(self, node):
1597
        self.body.append('</span>')
1598
1599
    def visit_index(self, node):
1600
        self.body.append('<div class="index">\n<h1>Index</h1>\n')
1601
        
1602
    def depart_index(self, node):
1603
        self.body.append('</div>\n')
1604
1605
    _seen_callout_markers = set()
1606
    def visit_callout_marker(self, node):
1607
        # Only add an id to a marker the first time we see it.
1608
        add_id = (node['name'] not in self._seen_callout_markers)
1609
        self._seen_callout_markers.add(node['name'])
1610
        if add_id:
1611
            self.body.append('<span id="%s">' % node['name'])
1612
        self.body.append(CALLOUT_IMG % (node['number'], node['number']))
1613
        if add_id:
1614
            self.body.append('</span>')
1615
        raise docutils.nodes.SkipNode() # Done with this node.
1616
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1617
    def depart_field_name(self, node):
1618
        # Don't add ":" in callout field lists.
1619
        if 'callout' in node['classes']:
1620
            self.body.append(self.context.pop())
1621
        else:
1622
            HTMLTranslator.depart_field_name(self, node)
1623
    
1624
    def _striphtml_len(self, s):
1625
        return len(re.sub(r'&[^;]+;', 'x', re.sub(r'<[^<]+>', '', s)))
1626
1627
    def visit_caption(self, node):
1099.1.218 by Nick Chadwick
tutorials can now use RST
1628
        if isinstance(node.parent, pylisting):
1629
            self.body.append('<tr><td class="caption">')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1630
        HTMLTranslator.visit_caption(self, node)
1631
        
1632
    def depart_caption(self, node):
1099.1.218 by Nick Chadwick
tutorials can now use RST
1633
        if isinstance(node.parent, pylisting):
1634
            self.body.append('</td></tr>')
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1635
        HTMLTranslator.depart_caption(self, node)
1636
1637
    def starttag(self, node, tagname, suffix='\n', empty=0, **attributes):
1638
        if node.get('mimetype'):
1639
            attributes['type'] = node.get('mimetype')
1640
        return HTMLTranslator.starttag(self, node, tagname, suffix,
1641
                                       empty, **attributes)
1642
        
1643
######################################################################
1644
#{ Source Code Highlighting
1645
######################################################################
1646
1647
# [xx] Note: requires the very latest svn version of epydoc!
1648
from epydoc.markup.doctest import DoctestColorizer
1649
1650
class HTMLDoctestColorizer(DoctestColorizer):
1651
    PREFIX = '<pre class="doctest">\n'
1652
    SUFFIX = '</pre>\n'
1653
    def __init__(self, encode_func, callouts=None):
1654
        self.encode = encode_func
1655
        self.callouts = callouts
1656
    def markup(self, s, tag):
1657
        if tag == 'other':
1658
            return self.encode(s)
1659
        elif (tag == 'comment' and self.callouts is not None and
1660
              CALLOUT_RE.match(s)):
1661
            callout_id = CALLOUT_RE.match(s).group(1)
1662
            callout_num = self.callouts[callout_id]
1663
            img = CALLOUT_IMG % (callout_num, callout_num)
1664
            return ('<a name="%s" /><a href="#ref-%s">%s</a>' %
1665
                    (callout_id, callout_id, img))
1666
        else:
1667
            return ('<span class="pysrc-%s">%s</span>' %
1668
                    (tag, self.encode(s)))
1669
1670
######################################################################
1671
#{ Customized Reader (register new transforms)
1672
######################################################################
1673
1674
class CustomizedReader(StandaloneReader):
1675
    _TRANSFORMS = [
1676
        UnindentDoctests,           # 1000
1677
        ]
1678
    def get_transforms(self):
1679
        return StandaloneReader.get_transforms(self) + self._TRANSFORMS
1680
1099.1.218 by Nick Chadwick
tutorials can now use RST
1681
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1682
######################################################################
1683
#{ Main Function
1684
######################################################################
1685
1686
_OUTPUT_RE = re.compile(r'<div class="document">\s+(.*)\s+</div>\n</body>\n</html>',
1687
    re.MULTILINE | re.DOTALL)
1688
1689
def rst(input):
1690
    try:
1099.1.218 by Nick Chadwick
tutorials can now use RST
1691
        CustomizedHTMLWriter.settings_defaults.update()
1099.1.225 by Nick Chadwick
Modified the setup script to include '.txt' files.
1692
        header = '.. include:: ' + os.path.join(
1693
            os.path.dirname(inspect.getfile(rst)), 'definitions.txt') + '\n' 
1694
        input = header + input
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1695
        output = docutils.core.publish_string(input,
1696
            writer=CustomizedHTMLWriter(), reader=CustomizedReader())
1697
        match = _OUTPUT_RE.search(output)
1698
        if match:
1099.1.232 by Nick Chadwick
Removed XML from database. RST now generates a full xml document, not
1699
            return "<div>" + match.group(1) + "</div>"
523 by stevenbird
Adding ReStructured Text preprocessing of exercise descriptions,
1700
        else:
1701
            raise ValueError('Could not process exercise definition')
1702
1703
    except docutils.utils.SystemMessage, e:
1704
        print 'Fatal error encountered!', e
1705
        raise
1706
        sys.exit(-1)