Source code for ecoxipy.transformation

# -*- coding: utf-8 -*-
'''\

:mod:`ecoxipy.transformation` - Transforming XML
================================================

This module provides an XML transformation API. It containes the abstract
:class:`ecoxipy.Output` implementation :class:`MarkupTransformer`, which does
on-the-fly transformations while creating XML with the wrapped output
instance. Implementations of this class are used as output of
:class:`ecoxipy.MarkupBuilder` instances. Instances of
:class:`PyXOMTransformer` transform :mod:`ecoxipy.pyxom` data structures
in-place. The methods of implementations of these classes can be annotated
with the decorators in :attr:`MATCH` to serve as transformation callables --
if a transformation is the first who's test matches, it is applied and
transformation is finished. If no transformation callable matches, the node
is not transformed.


Examples
--------

We define a transformer which does:

*   Elements with an attribute ``href`` are converted to ``a`` links. If the
    element is a ``span``, it is directly converted to an ``a``, otherwise
    the original element is wrapped in an ``a``.

*   Processing instructions with target ``title`` are converted to an ``h1``
    element and set the instance variable ``_title``.

*   Text nodes with content ``foo bar`` are converted to upper case and
    wrapped into a ``strong`` element.

*   All comments are removed.

*   Documents with document type ``template`` are converted to a HTML5
    document using the instance variable ``_title``.


>>> class MyTransformer(MarkupTransformer):
...     @MATCH.element.test(
...             lambda name, children, attributes: 'href' in attributes)
...     def link(self, name, children, attributes):
...         if name == 'span':
...             return self.B.a(children, attributes)
...         href = attributes.pop('href')
...         return self.B.a(
...             self.B[name](children, attributes), href=href)
...
...     @MATCH.pi.title
...     def foo(self, target, content):
...         self._title = content
...         return self.B.h1(content)
...
...     @MATCH.text('foo bar')
...     def foo_bar(self, content):
...         return self.B.strong(content.upper())
...
...     @MATCH.comment()
...     def comment(self, content):
...         pass
...
...     @MATCH.document.template
...     def doc(self, *args):
...         B = self.B
...         return B[:'html':True]('\\n',
...             B.html('\\n',
...                 B.head(B.title(self._title)), '\\n',
...                 B.body('\\n', args[3], '\\n'), '\\n',
...             )
...         )


We create an example document, using the transformer:

>>> from ecoxipy import MarkupBuilder
>>> B = MyTransformer.builder()
>>> print(B[:'template':True](
...     B['title':'Test'], '\\n',
...     B | 'This comment will be removed.',
...     B.p('\\n',
...         B.span('Example Site', href='http://example.com'), '\\n',
...         'foo bar', '\\n',
...         B.em('Example Site', href='http://example.com', lang='en'), '\\n'
...     )
... ))
<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<h1>Test</h1>
<p>
<a href="http://example.com">Example Site</a>
<strong>FOO BAR</strong>
<a href="http://example.com"><em lang="en">Example Site</em></a>
</p>
</body>
</html>


API
---

.. autoclass:: MarkupTransformer
    :no-members:

.. attribute:: MATCH

    .. attribute:: .element

    .. attribute:: .text

    .. attribute:: .comment

    .. attribute:: .pi

    .. attribute:: .document

'''

import abc

from tinkerpy import metaclass

from ecoxipy import _unicode


NODE_TYPES = ('element', 'processing_instruction', 'text', 'comment',
            'document')


[docs]class MATCH(object):
    class _AddMatch(object):
        def __init__(self, node_type):
            self._attribute_name = '_MATCH_' + node_type

        def test(self, test):
            def decorator(func):
                import inspect
                stack = inspect.stack()
                frame = stack[1][0]
                caller_locals = frame.f_locals
                del stack, frame
                try:
                    match_list = caller_locals[self._attribute_name]
                except KeyError:
                    match_list = []
                    caller_locals[self._attribute_name] = match_list
                match_list.append((test, func.__name__))
                return func
            return decorator

        def __call__(self, *infos):
            if len(infos) == 0:
                test = lambda *args: True
            else:
                test = lambda *args: infos == args[:len(infos)]
            return self.test(test)

        __getattr__ = __call__

    element = _AddMatch('element')
    pi = _AddMatch('processing_instruction')
    text = _AddMatch('text')
    comment = _AddMatch('comment')
    document = _AddMatch('document')
    del _AddMatch

MATCH = MATCH()


class _Transformer(object):
    '''Internal base class for transformers.'''
    class MATCH_SPEC(object):
        def _init_cls(self, cls):
            match_infos = {node_type: [] for node_type in NODE_TYPES}
            for base_cls in cls.__mro__:
                for node_type in NODE_TYPES:
                    try:
                        node_type_infos = getattr(base_cls,
                            '_MATCH_' + node_type)
                    except AttributeError:
                        pass
                    else:
                        match_infos[node_type].extend(node_type_infos)
            cls._MATCH_SPEC = match_infos

        def _init_obj(self, obj, cls):
            cls_match_infos = cls._MATCH_SPEC
            obj_match_infos = {}
            for node_type in cls_match_infos:
                node_type_infos = []
                obj_match_infos[node_type] = node_type_infos
                for test, meth_name in cls_match_infos[node_type]:
                    meth = getattr(obj, meth_name)
                    node_type_infos.append((test, meth))
            obj._MATCH_SPEC = obj_match_infos

        def __get__(self, obj, cls):
            if '_MATCH_SPEC' not in cls.__dict__:
                self._init_cls(cls)
            if obj is None:
                return cls._MATCH_SPEC
            if '_MATCH_SPEC' not in obj.__dict__:
                self._init_obj(obj, cls)
            return obj._MATCH_SPEC

        def __set__(self, obj, value):
            raise AttributeError()

    MATCH_SPEC = MATCH_SPEC()

    def __init__(self, output=None, input_encoding='UTF-8', parser=None):
        self._output = output
        self._input_encoding = input_encoding
        self._parser = parser
        if output is None:
            self.B
            self._output = self.B._output

    @property
    def output(self):
        return self._output

    @property
    def B(self):
        try:
            return self._builder
        except AttributeError:
            from ecoxipy import MarkupBuilder
            builder = MarkupBuilder(self._output, self._input_encoding,
                self._parser)
            self._builder = builder
            return builder

    def default_transformer(self, node_type):
        return getattr(self.output, node_type)

    def transformer(self, node_type):
        match_spec = self.MATCH_SPEC[node_type]
        default_transformer = self.default_transformer(node_type)
        def transform(*args):
            for test, method in match_spec:
                if test(*args):
                    return method(*args)
            return default_transformer(*args)
        return transform

    @classmethod
    def builder(cls, *args, **kargs):
        from ecoxipy import MarkupBuilder
        transformer = cls(*args, **kargs)
        builder = MarkupBuilder(transformer, transformer._input_encoding,
            transformer._parser)
        return builder


@metaclass(abc.ABCMeta)
[docs]class MarkupTransformer(_Transformer):
    '''\
    Base transformer class that implements the :class:`ecoxipy.Output`
    interface.

    Extend it and annotate your methods with the decorators contained in
    :attr:`MATCH`. These methods must have a signature compatible with the
    appropriate methods of :class:`ecoxipy.Output`, i.e. a method annotated with
    `MATCH.element` must conform to :meth:`ecoxipy.Output.element`. Their return
    values replace the content otherwise created, if there were no
    transformation. The methods should use the builder contained in the
    attribute :attr:`B` to create XML structures, then they are independent of
    the :class:`ecoxipy.Output` implementation given on intitialization.

    Your class should not override the methods as defined by
    :class:`ecoxipy.Output` and the following attributes:

    *   ``B``
    *   ``builder``
    *   ``default_transformer``
    *   ``MATCH_SPEC``
    *   ``transformer``
    *   ``output``
    '''
    def __init__(self, output=None, input_encoding='UTF-8', parser=None):
        _Transformer.__init__(self, output, input_encoding, parser)
        for node_type in NODE_TYPES:
            setattr(self, node_type, self.transformer(node_type))

    def is_native_type(self, content):
        return self._output.is_native_type(content)

    def fragment(self, children):
        return self._output.fragment(children)


# TODO: add to documentation
# TODO: add tests
@metaclass(abc.ABCMeta)
class PyXOMTransformer(_Transformer):
    '''\
    Base transformer class that works on :mod:`ecoxipy.pyxom` nodes. Instances
    of this class are callable with an arbitrary number of PyXOM nodes as the
    arguments to be transformed. The result of such a call is a XML structure in
    the output representation configured by the intitialization argument
    ``output``.

    Extend it and annotate your methods with the decorators contained in
    :attr:`MATCH`. These methods must take an PyXOM node of the class
    appropriate for the match, i.e. methods match a document will receive
    :class:`ecoxipy.pyxom.Document` instances. They should return XML structures
    in the output representation using the builder contained in the
    attribute :attr:`B`. The transformation methods are called with the matching
    node instances and their return value is used in place of the XML structure
    otherwise created from the given node.

    Your class should not override the following attributes:

    *   ``__call__``
    *   ``_apply``
    *   ``_queue``
    *   ``B``
    *   ``builder``
    *   ``comment``
    *   ``default_transformer``
    *   ``document``
    *   ``element``
    *   ``fragment``
    *   ``MATCH_SPEC``
    *   ``output``
    *   ``processing_instruction``
    *   ``text``
    *   ``transformer``
    '''
    def __init__(self, output=None, input_encoding='UTF-8', parser=None):
        _Transformer.__init__(self, output, input_encoding, parser)
        from ecoxipy.pyxom import (Element, ProcessingInstruction, Text,
            Comment, Document)
        self._transforms = {
            Element: self.transformer('element'),
            ProcessingInstruction: self.transformer('processing_instruction'),
            Text: self.transformer('text'),
            Comment: self.transformer('comment'),
            Document: self.transformer('document')
        }

    from collections import deque as _queue

    def default_transformer(self, node_type):
        return getattr(self, node_type)

    def element(self, node):
        return self.output.element(node.name, self._apply(node),
            node.attributes.to_dict())

    def processing_instruction(self, node):
        return self.output.processing_instruction(node.target, node.content)

    def text(self, node):
        return self.output.text(node.content)

    def comment(self, node):
        return self.output.comment(node.content)

    def document(self, node):
        doctype = node.doctype
        return self.output.document(doctype.name, doctype.publicid,
            doctype.systemid, self._apply(node), node.omit_xml_declaration)

    def fragment(self, children):
        return self.output.fragment(children)

    def _apply(self, nodes):
        return self._queue(self._transforms[node.__class__](node)
            for node in nodes)

    def __call__(self, *nodes):
        results = self._apply(nodes)
        if len(results) == 1:
            return results.popleft()
        return results

del abc
Navigation

Source code for ecoxipy.transformation

Quick search

Navigation