# -*- coding: utf-8 -*-
'''\
:mod:`ecoxipy.transformation` - Transforming XML
================================================
This module provides an XML transformation API. It containes the abstract
:class:`ecoxipy.Output` implementation :class:`MarkupTransformer`, which does
on-the-fly transformations while creating XML with the wrapped output
instance. Implementations of this class are used as output of
:class:`ecoxipy.MarkupBuilder` instances. Instances of
:class:`PyXOMTransformer` transform :mod:`ecoxipy.pyxom` data structures
in-place. The methods of implementations of these classes can be annotated
with the decorators in :attr:`MATCH` to serve as transformation callables --
if a transformation is the first who's test matches, it is applied and
transformation is finished. If no transformation callable matches, the node
is not transformed.
Examples
--------
We define a transformer which does:
* Elements with an attribute ``href`` are converted to ``a`` links. If the
element is a ``span``, it is directly converted to an ``a``, otherwise
the original element is wrapped in an ``a``.
* Processing instructions with target ``title`` are converted to an ``h1``
element and set the instance variable ``_title``.
* Text nodes with content ``foo bar`` are converted to upper case and
wrapped into a ``strong`` element.
* All comments are removed.
* Documents with document type ``template`` are converted to a HTML5
document using the instance variable ``_title``.
>>> class MyTransformer(MarkupTransformer):
... @MATCH.element.test(
... lambda name, children, attributes: 'href' in attributes)
... def link(self, name, children, attributes):
... if name == 'span':
... return self.B.a(children, attributes)
... href = attributes.pop('href')
... return self.B.a(
... self.B[name](children, attributes), href=href)
...
... @MATCH.pi.title
... def foo(self, target, content):
... self._title = content
... return self.B.h1(content)
...
... @MATCH.text('foo bar')
... def foo_bar(self, content):
... return self.B.strong(content.upper())
...
... @MATCH.comment()
... def comment(self, content):
... pass
...
... @MATCH.document.template
... def doc(self, *args):
... B = self.B
... return B[:'html':True]('\\n',
... B.html('\\n',
... B.head(B.title(self._title)), '\\n',
... B.body('\\n', args[3], '\\n'), '\\n',
... )
... )
We create an example document, using the transformer:
>>> from ecoxipy import MarkupBuilder
>>> B = MyTransformer.builder()
>>> print(B[:'template':True](
... B['title':'Test'], '\\n',
... B | 'This comment will be removed.',
... B.p('\\n',
... B.span('Example Site', href='http://example.com'), '\\n',
... 'foo bar', '\\n',
... B.em('Example Site', href='http://example.com', lang='en'), '\\n'
... )
... ))
<!DOCTYPE html>
<html>
<head><title>Test</title></head>
<body>
<h1>Test</h1>
<p>
<a href="http://example.com">Example Site</a>
<strong>FOO BAR</strong>
<a href="http://example.com"><em lang="en">Example Site</em></a>
</p>
</body>
</html>
API
---
.. autoclass:: MarkupTransformer
:no-members:
.. attribute:: MATCH
.. attribute:: .element
.. attribute:: .text
.. attribute:: .comment
.. attribute:: .pi
.. attribute:: .document
'''
import abc
from tinkerpy import metaclass
from ecoxipy import _unicode
NODE_TYPES = ('element', 'processing_instruction', 'text', 'comment',
'document')
[docs]class MATCH(object):
class _AddMatch(object):
def __init__(self, node_type):
self._attribute_name = '_MATCH_' + node_type
def test(self, test):
def decorator(func):
import inspect
stack = inspect.stack()
frame = stack[1][0]
caller_locals = frame.f_locals
del stack, frame
try:
match_list = caller_locals[self._attribute_name]
except KeyError:
match_list = []
caller_locals[self._attribute_name] = match_list
match_list.append((test, func.__name__))
return func
return decorator
def __call__(self, *infos):
if len(infos) == 0:
test = lambda *args: True
else:
test = lambda *args: infos == args[:len(infos)]
return self.test(test)
__getattr__ = __call__
element = _AddMatch('element')
pi = _AddMatch('processing_instruction')
text = _AddMatch('text')
comment = _AddMatch('comment')
document = _AddMatch('document')
del _AddMatch
MATCH = MATCH()
class _Transformer(object):
'''Internal base class for transformers.'''
class MATCH_SPEC(object):
def _init_cls(self, cls):
match_infos = {node_type: [] for node_type in NODE_TYPES}
for base_cls in cls.__mro__:
for node_type in NODE_TYPES:
try:
node_type_infos = getattr(base_cls,
'_MATCH_' + node_type)
except AttributeError:
pass
else:
match_infos[node_type].extend(node_type_infos)
cls._MATCH_SPEC = match_infos
def _init_obj(self, obj, cls):
cls_match_infos = cls._MATCH_SPEC
obj_match_infos = {}
for node_type in cls_match_infos:
node_type_infos = []
obj_match_infos[node_type] = node_type_infos
for test, meth_name in cls_match_infos[node_type]:
meth = getattr(obj, meth_name)
node_type_infos.append((test, meth))
obj._MATCH_SPEC = obj_match_infos
def __get__(self, obj, cls):
if '_MATCH_SPEC' not in cls.__dict__:
self._init_cls(cls)
if obj is None:
return cls._MATCH_SPEC
if '_MATCH_SPEC' not in obj.__dict__:
self._init_obj(obj, cls)
return obj._MATCH_SPEC
def __set__(self, obj, value):
raise AttributeError()
MATCH_SPEC = MATCH_SPEC()
def __init__(self, output=None, input_encoding='UTF-8', parser=None):
self._output = output
self._input_encoding = input_encoding
self._parser = parser
if output is None:
self.B
self._output = self.B._output
@property
def output(self):
return self._output
@property
def B(self):
try:
return self._builder
except AttributeError:
from ecoxipy import MarkupBuilder
builder = MarkupBuilder(self._output, self._input_encoding,
self._parser)
self._builder = builder
return builder
def default_transformer(self, node_type):
return getattr(self.output, node_type)
def transformer(self, node_type):
match_spec = self.MATCH_SPEC[node_type]
default_transformer = self.default_transformer(node_type)
def transform(*args):
for test, method in match_spec:
if test(*args):
return method(*args)
return default_transformer(*args)
return transform
@classmethod
def builder(cls, *args, **kargs):
from ecoxipy import MarkupBuilder
transformer = cls(*args, **kargs)
builder = MarkupBuilder(transformer, transformer._input_encoding,
transformer._parser)
return builder
@metaclass(abc.ABCMeta)
@metaclass(abc.ABCMeta)
class PyXOMTransformer(_Transformer):
'''\
Base transformer class that works on :mod:`ecoxipy.pyxom` nodes. Instances
of this class are callable with an arbitrary number of PyXOM nodes as the
arguments to be transformed. The result of such a call is a XML structure in
the output representation configured by the intitialization argument
``output``.
Extend it and annotate your methods with the decorators contained in
:attr:`MATCH`. These methods must take an PyXOM node of the class
appropriate for the match, i.e. methods match a document will receive
:class:`ecoxipy.pyxom.Document` instances. They should return XML structures
in the output representation using the builder contained in the
attribute :attr:`B`. The transformation methods are called with the matching
node instances and their return value is used in place of the XML structure
otherwise created from the given node.
Your class should not override the following attributes:
* ``__call__``
* ``_apply``
* ``_queue``
* ``B``
* ``builder``
* ``comment``
* ``default_transformer``
* ``document``
* ``element``
* ``fragment``
* ``MATCH_SPEC``
* ``output``
* ``processing_instruction``
* ``text``
* ``transformer``
'''
def __init__(self, output=None, input_encoding='UTF-8', parser=None):
_Transformer.__init__(self, output, input_encoding, parser)
from ecoxipy.pyxom import (Element, ProcessingInstruction, Text,
Comment, Document)
self._transforms = {
Element: self.transformer('element'),
ProcessingInstruction: self.transformer('processing_instruction'),
Text: self.transformer('text'),
Comment: self.transformer('comment'),
Document: self.transformer('document')
}
from collections import deque as _queue
def default_transformer(self, node_type):
return getattr(self, node_type)
def element(self, node):
return self.output.element(node.name, self._apply(node),
node.attributes.to_dict())
def processing_instruction(self, node):
return self.output.processing_instruction(node.target, node.content)
def text(self, node):
return self.output.text(node.content)
def comment(self, node):
return self.output.comment(node.content)
def document(self, node):
doctype = node.doctype
return self.output.document(doctype.name, doctype.publicid,
doctype.systemid, self._apply(node), node.omit_xml_declaration)
def fragment(self, children):
return self.output.fragment(children)
def _apply(self, nodes):
return self._queue(self._transforms[node.__class__](node)
for node in nodes)
def __call__(self, *nodes):
results = self._apply(nodes)
if len(results) == 1:
return results.popleft()
return results
del abc