#!/usr/bin/env python ## title = "mylit" ## stylesheet = "pygments_style.css" #

mylit

# mylit is a simple tool for literate programming in Python. To convert a literate Python # program called somefile.py to HTML, run python -m mylit # somefile.py. The following documentation has been generated from the # mylit source. #! TODO: automatic links between identifiers, macros # We use itertools # for chaining sequences. import itertools # To make sure we only parse lines beginning with # that actually # are comments (and not, e.g., inside strings), we double-check with the tokenize module. import tokenize # We use Pygments for syntax highlighting. from pygments import highlight from pygments.lexers import PythonLexer from pygments.formatters import HtmlFormatter # This is the HTML template that will be filled with code: template = """ %(title)s %(body)s """ # strip_left() removes whitespace from each line in # block, plus additional amount characters. This is # defined as a function because the list comprehension cannot be used directly # in format_program(), which uses an exec statement. def strip_left(block, amount=0): return [x.lstrip()[amount:] for x in block] # find_comments() returns a list of (row, column) tuples of # locations where comments start. Row indices are one-based! def find_comments(data): # This helper function splits data into newline-terminated # lines for generate_tokens. def readline(data): for line in data.splitlines(): yield line + "\n" yield "" # Here we generate tokens, extract comments end remember only the starting # index. return [start for ttype, tstring, start, end, line in tokenize.generate_tokens(readline(data).next) if ttype == tokenize.COMMENT] # lines() splits data into newline-terminated lines. # Again, this is defined outside of format_program() because of # the exec() call. def lines(data): return (line + "\n" for line in data.splitlines()) # format_program iterates over the lines object and # returns HTML. def format_program(data, title="", stylesheet="http://pygments.org/media/pygments_style.css"): # The HTML body is stored in body. body = [] # Adjacent lines of the same type are aggregated in block and # formatted together. block = [] # The type of the last block is stored in last_block_type. last_block_type = None # Here we store a list of beginning indices of comment tokens. comments = find_comments(data) # Now we iterate over the lines, formatting comments and code as # appropriate. None is appended to the list # of lines to terminate the last block. The line numbers start at one # to be consistent with the tokenizer. for lineno, line in enumerate(itertools.chain(lines(data), [None]), 1): # Comment lines starting with #! are ignored. This # includes the traditional "shebang" line as well as any code the user # may want to exclude from the output. if line is not None and line.strip().startswith("#!") and (lineno, line.find("#")) in comments: continue # A None line terminates the previous block. if line is None: block_type = None # Comment lines starting with ## are executed. This can be # used to set configuration variables, for example. elif line.strip().startswith("##") and (lineno, line.find("#")) in comments: block_type = "exec" # Any other comment line starting with # is a comment to # include in the HTML output. elif line.strip().startswith("#") and (lineno, line.find("#")) in comments: block_type = "comment" # Blank lines terminate comment blocks only. elif not line.strip() and last_block_type == "comment": block_type = None # All other lines are considered code. else: block_type = "code" # Adjacent lines of the same type are aggregated. if block_type == last_block_type: block.append(line) # As soon as the block type changes, the previous block is formatted. else: # Code is formatted by Pygments. if last_block_type == "code": body.append('

' + highlight("".join(block), PythonLexer(), HtmlFormatter()) + '

') # Exec lines are executed and not copied to the output. elif last_block_type == "exec": exec("".join(strip_left(block, len(block[0]) - len(block[0].lstrip()[2:].lstrip())))) # Comments are copied verbatim to the output. elif last_block_type == "comment": body.append('

' + " ".join(strip_left(block, 1)) + '

') last_block_type = block_type block = [line] # Insert missing variables into the template and return. body = "\n".join(body) return template % locals() # When the script is called from the command line, parse the arguments and run # format_program. if __name__ == "__main__": # argparse # is used for parsing the command line arguments. import argparse # os is used for # file name manipulation. import os # sys contains # the standard output stream. import sys # The parser is constructed here. parser = argparse.ArgumentParser(description="Convert a literate Python program to HTML.") # infilename is a mandatory positional argument. parser.add_argument("infilename", help="the input file") # outfilename is an optional argument. parser.add_argument("outfilename", nargs="?", help="the output file, '-' for standard output") # title and stylesheet can be specified if the # script does not do so itself. parser.add_argument("--title", "-t", help="the document title") parser.add_argument("--stylesheet", "-s", help="the document title") # Parse the arguments. args = parser.parse_args() # If outfilename is not specified, it is generated from infilename. if args.outfilename is None: args.outfilename = os.path.splitext(args.infilename)[0] + os.path.extsep + "html" # Pop any arguments that are not meant to end up in the format_program call. infilename = args.__dict__.pop("infilename") outfilename = args.__dict__.pop("outfilename") # Open the input file and format it. with open(infilename, "r") as f: result = format_program(f.read(), **args.__dict__) # Write the results. This happens after the input file is closed, so that # in-place formatting is possible. with sys.stdout if outfilename == "-" else open(outfilename, "w") as f: f.write(result)