Previous topic

6. Helper Functions

Next topic

8. C-API

This Page

7. Subclassing InputReaderΒΆ

Quite often it is necessary to do some further processing of the data that is read in before it can be used. This can range from actual further editing of the data to simply rearranging the data to be more easily accessed. InputReader has a post_process() method that is called immediately before the Namespace is returned from read_input(). In the base InputReader class it is implemented as a no-op (it does nothing), but you can subclass InputReader and use this function to further edit the data in the Namespace.

Here is the boilerplate to use to subclass InputReader:

from input_reader import InputReader, ReaderError

class CustomReader(InputReader):

    # Run the InputReader initializations
    def __init__(self):
        super(CustomReader, self).__init__()

    # The post-processing code
    def post_process(self, namespace):
        pass

As an example of how we might use the post_process() method, let’s use some examples from the plotting program used to discuss InputReader.

Below is a full example of code that defines the custom reader, sets up the reading rules, then takes a look at what is in the namespace.

from input_reader import InputReader, ReaderError, abs_file_path
from input_reader import abs_file_path, file_safety_check, range_check

############################
# Define the custom reader #
############################

class CustomReader(InputReader):

    # Run the InputReader initializations
    def __init__(self):
        super(CustomReader, self).__init__()

    # The post-processing code
    def post_process(self, namespace):

        # If a polygon is given, extract the data and store in a tuple of
        # the container class XYPoint
        if 'polygon' in namespace:
            points = []
            class XYPoint(object):
                pass
            for xy in namespace.polygon.xypoint:
                xyp = XYPoint()
                # We don't have to protect against ValueErrors here because
                # the regex only accepted floating point numbers
                xyp.x = float(xy.group(1))
                xyp.y = float(xy.group(2))
                points.append(xyp)
            # Access vertex points as namespace.polygon.vertices[0].x
            namespace.polygon.add('vertices', points)

        # Convert all filenames to absolute paths.  Make sure they exist
        namespace.rawdata = list(namespace.rawdata)
        for n in xrange(len(namespace.rawdata)):
            namespace.rawdata[n] = abs_file_path(namespace.rawdata[n])
            file_safety_check(namespace.rawdata[n]) # Raises IOError if unsafe
        namespace.rawdata = tuple(namespace.rawdata)

        # Also construct the output file name correctly
        out = abs_file_path(namespace.output[0])
        out = '.'.join([out, namespace.output[-1]['format']])
        # To be accessed as namespace.output['name'] (or 'compression')
        namespace.output = {'name': out,
                            'compression': namespace.output[-1]['compression']}

###########################
# Define the reader rules #
###########################

# Remember to use your custom reader!
reader = CustomReader()

# Distance conversion booleans.  Default is meters.
dunits = reader.add_mutually_exclusive_group(dest='distconv', default=lambda x: 1.0 * x)
dunits.add_boolean_key('meters', action=lambda x: 1.0 * x)
dunits.add_boolean_key('centimeters', action=lambda x: 100.0 * x)
dunits.add_boolean_key('kilometers', action=lambda x: 0.001 * x)
dunits.add_boolean_key('millimeters', action=lambda x: 1000.0 * x)

# Time conversion booleans. Default is seconds.
tunits = reader.add_mutually_exclusive_group(dest='timeconv', default=lambda x: x / 1.0)
tunits.add_boolean_key('seconds', action=lambda x: x / 1.0)
tunits.add_boolean_key('minutes', action=lambda x: x / 60.0)
tunits.add_boolean_key('hours', action=lambda x: x / 3600.0)

# The raw data file(s)
reader.add_line_key('rawdata', case=True, repeat=True, required=True)

# Output file
formats = ('pdf', 'png', 'jpg', 'svg', 'bmp', 'eps')
compress = ('zip', 'tgz', 'tbz2', None)
reader.add_line_key('output', case=True, type=str, required=True,
                    keywords={'format':{'type':formats, 'default':'pdf'},
                              'compression':{'type':compress, 'default':None}})

# Line and point styles
colors = ('green', 'red', 'blue', 'orange', 'black', 'violet')
reader.add_line_key('linestyle', type=('solid', 'dashed', 'dotted'),
                    keywords={'color':{'type':colors,'default':'black'},
                              'size' :{'type':int,   'default':1}})
reader.add_line_key('pointstyle', type=('circles', 'squares', 'triangles'),
                    keywords={'color':{'type':colors,'default':'black'},
                              'size' :{'type':int,   'default':1}})

# Optional legend on the plot
legend = reader.add_block_key('legend')
legend.add_boolean_key('shadow')
legend.add_line_key('location', type=('upper_left', 'upper_right',
                                      'lower_left', 'lower_right'))
size = legend.add_block_key('size', end='subend')
size.add_line_key('box', type=int)
size.add_line_key('font', type=int)

# Optional polygon(s) to draw on the plot
polygon = reader.add_block_key('polygon')
polygon.add_regex_line('xypoint', r'(-?\d+\.?\d*) (-?\d+\.?\d*)', repeat=True)

##################
# The input file #
##################

from StringIO import StringIO
from textwrap import dedent
user_input = StringIO()
user_input.write(dedent('''\
        # Plot in centimeters
        centimeters
        # ... and in hours
        hours

        # Read from two data files
        rawdata DATA.txt

        # Output filename and format
        output myplot format=png compression=zip

        # Line and point styles
        linestyle dashed
        # Point style... make them big, and green!
        pointstyle circles color=green size=8

        # Place a polygon
        polygon
            0 0
            4.5 0
            5 5.5
            0 5
        end
        '''))

#########################
# Namespace exploration #
#########################

inp = reader.read_input(user_input)

print inp.rawdata
print inp.output
for vert in inp.polygon.vertices:
    print vert.x, vert.y

We would see this for output

('.../DATA.txt',)
{'name': '.../myplot.png', 'compression': 'zip'}
0.0 0.0
4.5 0.0
5.0 5.5
0.0 5.0