Source code for pyquery.pyquery

#-*- coding:utf-8 -*-
#
# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
#
# Distributed under the BSD license, see LICENSE.txt
from .cssselectpatch import JQueryTranslator
from .openers import url_opener
from copy import deepcopy
from lxml import etree
import lxml.html
import inspect
import sys

PY3k = sys.version_info >= (3,)

if PY3k:
    from urllib.parse import urlencode
    from urllib.parse import urljoin
    basestring = (str, bytes)
    unicode = str
else:
    from urllib import urlencode  # NOQA
    from urlparse import urljoin  # NOQA


def func_globals(f):
    return f.__globals__ if PY3k else f.func_globals


def func_code(f):
    return f.__code__ if PY3k else f.func_code


def fromstring(context, parser=None, custom_parser=None):
    """use html parser if we don't have clean xml
    """
    if hasattr(context, 'read') and hasattr(context.read, '__call__'):
        meth = 'parse'
    else:
        meth = 'fromstring'
    if custom_parser is None:
        if parser is None:
            try:
                result = getattr(etree, meth)(context)
            except etree.XMLSyntaxError:
                result = getattr(lxml.html, meth)(context)
            if isinstance(result, etree._ElementTree):
                return [result.getroot()]
            else:
                return [result]
        elif parser == 'xml':
            custom_parser = getattr(etree, meth)
        elif parser == 'html':
            custom_parser = getattr(lxml.html, meth)
        elif parser == 'soup':
            from  lxml.html import soupparser
            custom_parser = getattr(soupparser, meth)
        elif parser == 'html_fragments':
            custom_parser = lxml.html.fragments_fromstring
        else:
            ValueError('No such parser: "%s"' % parser)

    result = custom_parser(context)
    if type(result) is list:
        return result
    elif isinstance(result, etree._ElementTree):
        return [result.getroot()]
    elif result is not None:
        return [result]
    else:
        return []


def callback(func, *args):
    return func(*args[:func_code(func).co_argcount])


class NoDefault(object):
    def __repr__(self):
        """clean representation in Sphinx"""
        return '<NoDefault>'

no_default = NoDefault()
del NoDefault


class FlexibleElement(object):
    """property to allow a flexible api"""
    def __init__(self, pget, pset=no_default, pdel=no_default):
        self.pget = pget
        self.pset = pset
        self.pdel = pdel

    def __get__(self, instance, klass):
        class _element(object):
            """real element to support set/get/del attr and item and js call
            style"""
            def __call__(prop, *args, **kwargs):
                return self.pget(instance, *args, **kwargs)
            __getattr__ = __getitem__ = __setattr__ = __setitem__ = __call__

            def __delitem__(prop, name):
                if self.pdel is not no_default:
                    return self.pdel(instance, name)
                else:
                    raise NotImplementedError()
            __delattr__ = __delitem__

            def __repr__(prop):
                return '<flexible_element %s>' % self.pget.__name__
        return _element()

    def __set__(self, instance, value):
        if self.pset is not no_default:
            self.pset(instance, value)
        else:
            raise NotImplementedError()


[docs]class PyQuery(list):
    """The main class
    """
    def __init__(self, *args, **kwargs):
        html = None
        elements = []
        self._base_url = None
        self.parser = kwargs.get('parser', None)
        if 'parser' in kwargs:
            del kwargs['parser']

        if len(args) >= 1 and \
           (not PY3k and isinstance(args[0], basestring) or \
           (PY3k and isinstance(args[0], str))) and \
           args[0].split('://', 1)[0] in ('http', 'https'):
            kwargs['url'] = args[0]
            if len(args) >= 2:
                kwargs['data'] = args[1]
            args = []

        if 'parent' in kwargs:
            self._parent = kwargs.pop('parent')
        else:
            self._parent = no_default

        if 'css_translator' in kwargs:
            self._translator = kwargs.pop('css_translator')
        elif self.parser in ('xml',):
            self._translator = JQueryTranslator(xhtml=True)
        elif self._parent is not no_default:
            self._translator = self._parent._translator
        else:
            self._translator = JQueryTranslator(xhtml=False)

        namespaces = kwargs.get('namespaces', {})
        if 'namespaces' in kwargs:
            del kwargs['namespaces']

        if kwargs:
            # specific case to get the dom
            if 'filename' in kwargs:
                html = open(kwargs['filename'])
            elif 'url' in kwargs:
                url = kwargs.pop('url')
                if 'opener' in kwargs:
                    opener = kwargs.pop('opener')
                    html = opener(url, **kwargs)
                else:
                    html = url_opener(url, kwargs)
                if not self.parser:
                    self.parser = 'html'
                self._base_url = url
            else:
                raise ValueError('Invalid keyword arguments %s' % kwargs)

            elements = fromstring(html, self.parser)
            # close open descriptor if possible
            if hasattr(html, 'close'):
                try:
                    html.close()
                except:
                    pass

        else:
            # get nodes

            # determine context and selector if any
            selector = context = no_default
            length = len(args)
            if length == 1:
                context = args[0]
            elif length == 2:
                selector, context = args
            else:
                raise ValueError("You can't do that." +\
                        " Please, provide arguments")

            # get context
            if isinstance(context, basestring):
                try:
                    elements = fromstring(context, self.parser)
                except Exception:
                    raise
            elif isinstance(context, self.__class__):
                # copy
                elements = context[:]
            elif isinstance(context, list):
                elements = context
            elif isinstance(context, etree._Element):
                elements = [context]

            # select nodes
            if elements and selector is not no_default:
                xpath = self._css_to_xpath(selector)
                results = []
                for tag in elements:
                    results.extend(tag.xpath(xpath, namespaces=namespaces))
                elements = results

        list.__init__(self, elements)

    def _css_to_xpath(self, selector, prefix='descendant-or-self::'):
        selector = selector.replace('[@', '[')
        return self._translator.css_to_xpath(selector, prefix)

    def __call__(self, *args, **kwargs):
        """return a new PyQuery instance
        """
        length = len(args)
        if length == 0:
            raise ValueError('You must provide at least a selector')
        if args[0] == '':
            return self.__class__([])
        if len(args) == 1 and \
           isinstance(args[0], str) and \
           not args[0].startswith('<'):
            args += (self,)
        result = self.__class__(*args, parent=self, **kwargs)
        return result

    # keep original list api prefixed with _
    _append = list.append
    _extend = list.extend

    # improve pythonic api
    def __add__(self, other):
        assert isinstance(other, self.__class__)
        return self.__class__(self[:] + other[:])

[docs]    def extend(self, other):
        """Extend with anoter PyQuery object"""
        assert isinstance(other, self.__class__)
        self._extend(other[:])

[docs]    def items(self, selector=None):
        """Iter over elements. Return PyQuery objects:

            >>> d = PyQuery('<div><span>foo</span><span>bar</span></div>')
            >>> [i.text() for i in d.items('span')]
            ['foo', 'bar']
            >>> [i.text() for i in d('span').items()]
            ['foo', 'bar']
        """
        elems = selector and self(selector) or self
        for elem in elems:
            yield self.__class__(elem)

[docs]    def xhtml_to_html(self):
        """Remove xhtml namespace:

            >>> doc = PyQuery(
            ...         '<html xmlns="http://www.w3.org/1999/xhtml"></html>')
            >>> doc
            [<{http://www.w3.org/1999/xhtml}html>]
            >>> doc.remove_namespaces()
            [<html>]
        """
        try:
            root = self[0].getroottree()
        except IndexError:
            pass
        else:
            lxml.html.xhtml_to_html(root)
        return self

[docs]    def remove_namespaces(self):
        """Remove all namespaces:

            >>> doc = PyQuery('<foo xmlns="http://example.com/foo"></foo>')
            >>> doc
            [<{http://example.com/foo}foo>]
            >>> doc.remove_namespaces()
            [<foo>]
        """
        try:
            root = self[0].getroottree()
        except IndexError:
            pass
        else:
            for el in root.iter('{*}*'):
                if el.tag.startswith('{'):
                    el.tag = el.tag.split('}', 1)[1]
        return self

    def __str__(self):
        """xml representation of current nodes::

            >>> xml = PyQuery(
            ...   '<script><![[CDATA[ ]></script>', parser='html_fragments')
            >>> print(str(xml))
            <script>&lt;![[CDATA[ ]&gt;</script>

        """
        if PY3k:
            return ''.join([etree.tostring(e, encoding=str) for e in self])
        else:
            return ''.join([etree.tostring(e) for e in self])

    def __unicode__(self):
        """xml representation of current nodes"""
        return unicode('').join([etree.tostring(e, encoding=unicode) \
                                                        for e in self])

    def __html__(self):
        """html representation of current nodes::

            >>> html = PyQuery(
            ...   '<script><![[CDATA[ ]></script>', parser='html_fragments')
            >>> print(html.__html__())
            <script><![[CDATA[ ]></script>

        """
        return unicode('').join([lxml.html.tostring(e, encoding=unicode) \
                                                            for e in self])

    def __repr__(self):
        r = []
        try:
            for el in self:
                c = el.get('class')
                c = c and '.' + '.'.join(c.split(' ')) or ''
                id = el.get('id')
                id = id and '#' + id or ''
                r.append('<%s%s%s>' % (el.tag, id, c))
            return '[' + (', '.join(r)) + ']'
        except AttributeError:
            if PY3k:
                return list.__repr__(self)
            else:
                for el in self:
                    if isinstance(el, unicode):
                        r.append(el.encode('utf-8'))
                    else:
                        r.append(el)
                return repr(r)

    @property
[docs]    def root(self):
        """return the xml root element
        """
        if self._parent is not no_default:
            return self._parent.getroottree()
        return self[0].getroottree()

    @property
[docs]    def encoding(self):
        """return the xml encoding of the root element
        """
        root = self.root
        if root is not None:
            return self.root.docinfo.encoding

    ##############
    # Traversing #
    ##############

    def _filter_only(self, selector, elements, reverse=False, unique=False):
        """Filters the selection set only, as opposed to also including
           descendants.
        """
        if selector is None:
            results = elements
        else:
            xpath = self._css_to_xpath(selector, 'self::')
            results = []
            for tag in elements:
                results.extend(tag.xpath(xpath))
        if reverse:
            results.reverse()
        if unique:
            result_list = results
            results = []
            for item in result_list:
                if not item in results:
                    results.append(item)
        return self.__class__(results, **dict(parent=self))

    def parent(self, selector=None):
        return self._filter_only(
                   selector,
                   [e.getparent() for e in self if e.getparent() is not None],
                   unique=True)

    def prev(self, selector=None):
        return self._filter_only(
               selector,
               [e.getprevious() for e in self if e.getprevious() is not None])

    def next(self, selector=None):
        return self._filter_only(
               selector,
               [e.getnext() for e in self if e.getnext() is not None])

    def _traverse(self, method):
        for e in self:
            current = getattr(e, method)()
            while current is not None:
                yield current
                current = getattr(current, method)()

    def _traverse_parent_topdown(self):
        for e in self:
            this_list = []
            current = e.getparent()
            while current is not None:
                this_list.append(current)
                current = current.getparent()
            this_list.reverse()
            for j in this_list:
                yield j

    def _nextAll(self):
        return [e for e in self._traverse('getnext')]

[docs]    def nextAll(self, selector=None):
        """
        >>> h = '<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>'
        >>> d = PyQuery(h)
        >>> d('p:last').nextAll()
        [<img>]
        """
        return self._filter_only(selector, self._nextAll())

    def _prevAll(self):
        return [e for e in self._traverse('getprevious')]

[docs]    def prevAll(self, selector=None):
        """
        >>> h = '<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>'
        >>> d = PyQuery(h)
        >>> d('p:last').prevAll()
        [<p.hello>]
        """
        return self._filter_only(selector, self._prevAll(), reverse=True)

[docs]    def siblings(self, selector=None):
        """
         >>> h = '<span><p class="hello">Hi</p><p>Bye</p><img scr=""/></span>'
         >>> d = PyQuery(h)
         >>> d('.hello').siblings()
         [<p>, <img>]
         >>> d('.hello').siblings('img')
         [<img>]

        """
        return self._filter_only(selector, self._prevAll() + self._nextAll())

[docs]    def parents(self, selector=None):
        """
        >>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p></span>')
        >>> d('p').parents()
        [<span>]
        >>> d('.hello').parents('span')
        [<span>]
        >>> d('.hello').parents('p')
        []
        """
        return self._filter_only(
                selector,
                [e for e in self._traverse_parent_topdown()],
                unique=True
            )

[docs]    def children(self, selector=None):
        """Filter elements that are direct children of self using optional
        selector:

            >>> d = PyQuery('<span><p class="hello">Hi</p><p>Bye</p></span>')
            >>> d
            [<span>]
            >>> d.children()
            [<p.hello>, <p>]
            >>> d.children('.hello')
            [<p.hello>]
        """
        elements = [child for tag in self for child in tag.getchildren()]
        return self._filter_only(selector, elements)

[docs]    def closest(self, selector=None):
        """
        >>> d = PyQuery(
        ...  '<div class="hello"><p>This is a '
        ...  '<strong class="hello">test</strong></p></div>')
        >>> d('strong').closest('div')
        [<div.hello>]
        >>> d('strong').closest('.hello')
        [<strong.hello>]
        >>> d('strong').closest('form')
        []
        """
        result = []
        for current in self:
            while current is not None and \
                  not self.__class__(current).is_(selector):
                current = current.getparent()
            if current is not None:
                result.append(current)
        return self.__class__(result, **dict(parent=self))

[docs]    def contents(self):
        """
        Return contents (with text nodes):

            >>> d = PyQuery('hello <b>bold</b>')
            >>> d.contents()  # doctest: +ELLIPSIS
            ['hello ', <Element b at ...>]
        """
        results = []
        for elem in self:
            results.extend(elem.xpath('child::text()|child::*'))
        return self.__class__(results, **dict(parent=self))

[docs]    def filter(self, selector):
        """Filter elements in self using selector (string or function):

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p>')
            >>> d('p')
            [<p.hello>, <p>]
            >>> d('p').filter('.hello')
            [<p.hello>]
            >>> d('p').filter(lambda i: i == 1)
            [<p>]
            >>> d('p').filter(lambda i: PyQuery(this).text() == 'Hi')
            [<p.hello>]
            >>> d('p').filter(lambda i, this: PyQuery(this).text() == 'Hi')
            [<p.hello>]
        """
        if not hasattr(selector, '__call__'):
            return self._filter_only(selector, self)
        else:
            elements = []
            args = inspect.getargspec(callback).args
            try:
                for i, this in enumerate(self):
                    if len(args) == 1:
                        func_globals(selector)['this'] = this
                    if callback(selector, i, this):
                        elements.append(this)
            finally:
                f_globals = func_globals(selector)
                if 'this' in f_globals:
                    del f_globals['this']
            return self.__class__(elements, **dict(parent=self))

[docs]    def not_(self, selector):
        """Return elements that don't match the given selector:

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>')
            >>> d('p').not_('.hello')
            [<p>]
        """
        exclude = set(self.__class__(selector, self))
        return self.__class__([e for e in self if e not in exclude],
                              **dict(parent=self))

[docs]    def is_(self, selector):
        """Returns True if selector matches at least one current element, else
        False:

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>')
            >>> d('p').eq(0).is_('.hello')
            True

            >>> d('p').eq(1).is_('.hello')
            False

        ..
        """
        return bool(self.__class__(selector, self))

[docs]    def find(self, selector):
        """Find elements using selector traversing down from self:

            >>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>'
            >>> d = PyQuery(m)
            >>> d('p').find('em')
            [<em>, <em>]
            >>> d('p').eq(1).find('em')
            [<em>]
        """
        xpath = self._css_to_xpath(selector)
        results = [child.xpath(xpath) for tag in self \
                        for child in tag.getchildren()]
        # Flatten the results
        elements = []
        for r in results:
            elements.extend(r)
        return self.__class__(elements, **dict(parent=self))

[docs]    def eq(self, index):
        """Return PyQuery of only the element with the provided index::

            >>> d = PyQuery('<p class="hello">Hi</p><p>Bye</p><div></div>')
            >>> d('p').eq(0)
            [<p.hello>]
            >>> d('p').eq(1)
            [<p>]
            >>> d('p').eq(2)
            []

        ..
        """
        # Use slicing to silently handle out of bounds indexes
        items = self[index:index + 1]
        return self.__class__(items, **dict(parent=self))

[docs]    def each(self, func):
        """apply func on each nodes
        """
        try:
            for i, element in enumerate(self):
                func_globals(func)['this'] = element
                if callback(func, i, element) == False:
                    break
        finally:
            f_globals = func_globals(func)
            if 'this' in f_globals:
                del f_globals['this']
        return self

[docs]    def map(self, func):
        """Returns a new PyQuery after transforming current items with func.

        func should take two arguments - 'index' and 'element'.  Elements can
        also be referred to as 'this' inside of func::

            >>> d = PyQuery('<p class="hello">Hi there</p><p>Bye</p><br />')
            >>> d('p').map(lambda i, e: PyQuery(e).text())
            ['Hi there', 'Bye']

            >>> d('p').map(lambda i, e: len(PyQuery(this).text()))
            [8, 3]

            >>> d('p').map(lambda i, e: PyQuery(this).text().split())
            ['Hi', 'there', 'Bye']

        """
        items = []
        try:
            for i, element in enumerate(self):
                func_globals(func)['this'] = element
                result = callback(func, i, element)
                if result is not None:
                    if not isinstance(result, list):
                        items.append(result)
                    else:
                        items.extend(result)
        finally:
            f_globals = func_globals(func)
            if 'this' in f_globals:
                del f_globals['this']
        return self.__class__(items, **dict(parent=self))

    @property
    def length(self):
        return len(self)

    def size(self):
        return len(self)

[docs]    def end(self):
        """Break out of a level of traversal and return to the parent level.

            >>> m = '<p><span><em>Whoah!</em></span></p><p><em> there</em></p>'
            >>> d = PyQuery(m)
            >>> d('p').eq(1).find('em').end().end()
            [<p>, <p>]
        """
        return self._parent

    ##############
    # Attributes #
    ##############
    def attr(self, *args, **kwargs):
        """Attributes manipulation
        """

        mapping = {'class_': 'class', 'for_': 'for'}

        attr = value = no_default
        length = len(args)
        if length == 1:
            attr = args[0]
            attr = mapping.get(attr, attr)
        elif length == 2:
            attr, value = args
            attr = mapping.get(attr, attr)
        elif kwargs:
            attr = {}
            for k, v in kwargs.items():
                attr[mapping.get(k, k)] = v
        else:
            raise ValueError('Invalid arguments %s %s' % (args, kwargs))

        if not self:
            return None
        elif isinstance(attr, dict):
            for tag in self:
                for key, value in attr.items():
                    tag.set(key, value)
        elif value is no_default:
            return self[0].get(attr)
        elif value is None or value == '':
            return self.removeAttr(attr)
        else:
            for tag in self:
                tag.set(attr, value)
        return self

[docs]    def removeAttr(self, name):
        """Remove an attribute::

            >>> d = PyQuery('<div id="myid"></div>')
            >>> d.removeAttr('id')
            [<div>]

        ..
        """
        for tag in self:
            del tag.attrib[name]
        return self

    attr = FlexibleElement(pget=attr, pdel=removeAttr)

    #######
    # CSS #
    #######
[docs]    def height(self, value=no_default):
        """set/get height of element
        """
        return self.attr('height', value)

[docs]    def width(self, value=no_default):
        """set/get width of element
        """
        return self.attr('width', value)

[docs]    def hasClass(self, name):
        """Return True if element has class::

            >>> d = PyQuery('<div class="myclass"></div>')
            >>> d.hasClass('myclass')
            True

        ..
        """
        return self.is_('.%s' % name)

[docs]    def addClass(self, value):
        """Add a css class to elements::

            >>> d = PyQuery('<div></div>')
            >>> d.addClass('myclass')
            [<div.myclass>]

        ..
        """
        for tag in self:
            values = value.split(' ')
            classes = (tag.get('class') or '').split()
            classes += [v for v in values if v not in classes]
            tag.set('class', ' '.join(classes))
        return self

[docs]    def removeClass(self, value):
        """Remove a css class to elements::

            >>> d = PyQuery('<div class="myclass"></div>')
            >>> d.removeClass('myclass')
            [<div>]

        ..
        """
        for tag in self:
            values = value.split(' ')
            classes = set((tag.get('class') or '').split())
            classes.difference_update(values)
            classes.difference_update([''])
            tag.set('class', ' '.join(classes))
        return self

[docs]    def toggleClass(self, value):
        """Toggle a css class to elements

            >>> d = PyQuery('<div></div>')
            >>> d.toggleClass('myclass')
            [<div.myclass>]

        """
        for tag in self:
            values = value.split(' ')
            classes = (tag.get('class') or '').split()
            values_to_add = [v for v in values if v not in classes]
            values_to_del = [v for v in values if v in classes]
            classes = [v for v in classes if v not in values_to_del]
            classes += values_to_add
            tag.set('class', ' '.join(classes))
        return self

    def css(self, *args, **kwargs):
        """css attributes manipulation
        """

        attr = value = no_default
        length = len(args)
        if length == 1:
            attr = args[0]
        elif length == 2:
            attr, value = args
        elif kwargs:
            attr = kwargs
        else:
            raise ValueError('Invalid arguments %s %s' % (args, kwargs))

        if isinstance(attr, dict):
            for tag in self:
                stripped_keys = [key.strip().replace('_', '-')
                                 for key in attr.keys()]
                current = [el.strip()
                           for el in (tag.get('style') or '').split(';')
                           if el.strip()
                           and not el.split(':')[0].strip() in stripped_keys]
                for key, value in attr.items():
                    key = key.replace('_', '-')
                    current.append('%s: %s' % (key, value))
                tag.set('style', '; '.join(current))
        elif isinstance(value, basestring):
            attr = attr.replace('_', '-')
            for tag in self:
                current = [el.strip()
                           for el in (tag.get('style') or '').split(';')
                           if el.strip()
                              and not el.split(':')[0].strip() == attr.strip()]
                current.append('%s: %s' % (attr, value))
                tag.set('style', '; '.join(current))
        return self

    css = FlexibleElement(pget=css, pset=css)

    ###################
    # CORE UI EFFECTS #
    ###################
[docs]    def hide(self):
        """remove display:none to elements style

            >>> print(PyQuery('<div style="display:none;"/>').hide())
            <div style="display: none"/>

        """
        return self.css('display', 'none')

[docs]    def show(self):
        """add display:block to elements style

            >>> print(PyQuery('<div />').show())
            <div style="display: block"/>

        """
        return self.css('display', 'block')

    ########
    # HTML #
    ########
[docs]    def val(self, value=no_default):
        """Set the attribute value::

            >>> d = PyQuery('<input />')
            >>> d.val('Youhou')
            [<input>]

        Get the attribute value::

            >>> d.val()
            'Youhou'

        """
        return self.attr('value', value) or None

[docs]    def html(self, value=no_default, **kwargs):
        """Get or set the html representation of sub nodes.

        Get the text value::

            >>> d = PyQuery('<div><span>toto</span></div>')
            >>> print(d.html())
            <span>toto</span>

        Extra args are passed to ``lxml.etree.tostring``::

            >>> d = PyQuery('<div><span></span></div>')
            >>> print(d.html())
            <span/>
            >>> print(d.html(method='html'))
            <span></span>

        Set the text value::

            >>> d.html('<span>Youhou !</span>')
            [<div>]
            >>> print(d)
            <div><span>Youhou !</span></div>
        """
        if value is no_default:
            if not self:
                return None
            tag = self[0]
            children = tag.getchildren()
            if not children:
                return tag.text
            html = tag.text or ''
            if 'encoding' not in kwargs:
                kwargs['encoding'] = unicode
            html += unicode('').join([etree.tostring(e, **kwargs) \
                                                  for e in children])
            return html
        else:
            if isinstance(value, self.__class__):
                new_html = unicode(value)
            elif isinstance(value, basestring):
                new_html = value
            elif not value:
                new_html = ''
            else:
                raise ValueError(type(value))

            for tag in self:
                for child in tag.getchildren():
                    tag.remove(child)
                root = fromstring(
                            unicode('<root>') + new_html + unicode('</root>'),
                            self.parser)[0]
                children = root.getchildren()
                if children:
                    tag.extend(children)
                tag.text = root.text
                tag.tail = root.tail
        return self

[docs]    def outerHtml(self):
        """Get the html representation of the first selected element::

            >>> d = PyQuery('<div><span class="red">toto</span> rocks</div>')
            >>> print(d('span'))
            <span class="red">toto</span> rocks
            >>> print(d('span').outerHtml())
            <span class="red">toto</span>

            >>> S = PyQuery('<p>Only <b>me</b> & myself</p>')
            >>> print(S('b').outerHtml())
            <b>me</b>

        ..
        """

        if not self:
            return None
        e0 = self[0]
        if e0.tail:
            e0 = deepcopy(e0)
            e0.tail = ''
        return lxml.html.tostring(e0, encoding=unicode)

[docs]    def text(self, value=no_default):
        """Get or set the text representation of sub nodes.

        Get the text value::

            >>> doc = PyQuery('<div><span>toto</span><span>tata</span></div>')
            >>> print(doc.text())
            toto tata

        Set the text value::

            >>> doc.text('Youhou !')
            [<div>]
            >>> print(doc)
            <div>Youhou !</div>

        """

        if value is no_default:
            if not self:
                return None

            text = []

            def add_text(tag, no_tail=False):
                if tag.text and not isinstance(tag, lxml.etree._Comment):
                    text.append(tag.text)
                for child in tag.getchildren():
                    add_text(child)
                if not no_tail and tag.tail:
                    text.append(tag.tail)

            for tag in self:
                add_text(tag, no_tail=True)
            return ' '.join([t.strip() for t in text if t.strip()])

        for tag in self:
            for child in tag.getchildren():
                tag.remove(child)
            tag.text = value
        return self

    ################
    # Manipulating #
    ################

    def _get_root(self, value):
        if  isinstance(value, basestring):
            root = fromstring(unicode('<root>') + value + unicode('</root>'),
                              self.parser)[0]
        elif isinstance(value, etree._Element):
            root = self.__class__(value)
        elif isinstance(value, PyQuery):
            root = value
        else:
            raise TypeError(
            'Value must be string, PyQuery or Element. Got %r' % value)
        if hasattr(root, 'text') and isinstance(root.text, basestring):
            root_text = root.text
        else:
            root_text = ''
        return root, root_text

[docs]    def append(self, value):
        """append value to each nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            if len(tag) > 0:  # if the tag has children
                last_child = tag[-1]
                if not last_child.tail:
                    last_child.tail = ''
                last_child.tail += root_text
            else:
                if not tag.text:
                    tag.text = ''
                tag.text += root_text
            if i > 0:
                root = deepcopy(list(root))
            tag.extend(root)
            root = tag[-len(root):]
        return self

[docs]    def appendTo(self, value):
        """append nodes to value
        """
        value.append(self)
        return self

[docs]    def prepend(self, value):
        """prepend value to nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            if not tag.text:
                tag.text = ''
            if len(root) > 0:
                root[-1].tail = tag.text
                tag.text = root_text
            else:
                tag.text = root_text + tag.text
            if i > 0:
                root = deepcopy(list(root))
            tag[:0] = root
            root = tag[:len(root)]
        return self

[docs]    def prependTo(self, value):
        """prepend nodes to value
        """
        value.prepend(self)
        return self

[docs]    def after(self, value):
        """add value after nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            if not tag.tail:
                tag.tail = ''
            tag.tail += root_text
            if i > 0:
                root = deepcopy(list(root))
            parent = tag.getparent()
            index = parent.index(tag) + 1
            parent[index:index] = root
            root = parent[index:len(root)]
        return self

[docs]    def insertAfter(self, value):
        """insert nodes after value
        """
        value.after(self)
        return self

[docs]    def before(self, value):
        """insert value before nodes
        """
        root, root_text = self._get_root(value)
        for i, tag in enumerate(self):
            previous = tag.getprevious()
            if previous != None:
                if not previous.tail:
                    previous.tail = ''
                previous.tail += root_text
            else:
                parent = tag.getparent()
                if not parent.text:
                    parent.text = ''
                parent.text += root_text
            if i > 0:
                root = deepcopy(list(root))
            parent = tag.getparent()
            index = parent.index(tag)
            parent[index:index] = root
            root = parent[index:len(root)]
        return self

[docs]    def insertBefore(self, value):
        """insert nodes before value
        """
        value.before(self)
        return self

[docs]    def wrap(self, value):
        """A string of HTML that will be created on the fly and wrapped around
        each target:

            >>> d = PyQuery('<span>youhou</span>')
            >>> d.wrap('<div></div>')
            [<div>]
            >>> print(d)
            <div><span>youhou</span></div>

        """
        assert isinstance(value, basestring)
        value = fromstring(value)[0]
        nodes = []
        for tag in self:
            wrapper = deepcopy(value)
            # FIXME: using iterchildren is probably not optimal
            if not wrapper.getchildren():
                wrapper.append(deepcopy(tag))
            else:
                childs = [c for c in wrapper.iterchildren()]
                child = childs[-1]
                child.append(deepcopy(tag))
            nodes.append(wrapper)

            parent = tag.getparent()
            if parent is not None:
                for t in parent.iterchildren():
                    if t is tag:
                        t.addnext(wrapper)
                        parent.remove(t)
                        break
        self[:] = nodes
        return self

[docs]    def wrapAll(self, value):
        """Wrap all the elements in the matched set into a single wrapper
        element::

            >>> d = PyQuery('<div><span>Hey</span><span>you !</span></div>')
            >>> print(d('span').wrapAll('<div id="wrapper"></div>'))
            <div id="wrapper"><span>Hey</span><span>you !</span></div>

        ..
        """
        if not self:
            return self

        assert isinstance(value, basestring)
        value = fromstring(value)[0]
        wrapper = deepcopy(value)
        if not wrapper.getchildren():
            child = wrapper
        else:
            childs = [c for c in wrapper.iterchildren()]
            child = childs[-1]

        replace_childs = True
        parent = self[0].getparent()
        if parent is None:
            parent = no_default

        # add nodes to wrapper and check parent
        for tag in self:
            child.append(deepcopy(tag))
            if tag.getparent() is not parent:
                replace_childs = False

        # replace nodes i parent if possible
        if parent is not no_default and replace_childs:
            childs = [c for c in parent.iterchildren()]
            if len(childs) == len(self):
                for tag in self:
                    parent.remove(tag)
                parent.append(wrapper)

        self[:] = [wrapper]
        return self

[docs]    def replaceWith(self, value):
        """replace nodes by value
        """
        if hasattr(value, '__call__'):
            for i, element in enumerate(self):
                self.__class__(element).before(
                        value(i, element) + (element.tail or ''))
                parent = element.getparent()
                parent.remove(element)
        else:
            for tag in self:
                self.__class__(tag).before(value + (tag.tail or ''))
                parent = tag.getparent()
                parent.remove(tag)
        return self

[docs]    def replaceAll(self, expr):
        """replace nodes by expr
        """
        if self._parent is no_default:
            raise ValueError(
                    'replaceAll can only be used with an object with parent')
        self._parent(expr).replaceWith(self)
        return self

[docs]    def clone(self):
        """return a copy of nodes
        """
        return PyQuery([deepcopy(tag) for tag in self])

[docs]    def empty(self):
        """remove nodes content
        """
        for tag in self:
            tag.text = None
            tag[:] = []
        return self

[docs]    def remove(self, expr=no_default):
        """Remove nodes:

         >>> h = '<div>Maybe <em>she</em> does <strong>NOT</strong> know</div>'
         >>> d = PyQuery(h)
         >>> d('strong').remove()
         [<strong>]
         >>> print(d)
         <div>Maybe <em>she</em> does   know</div>
        """
        if expr is no_default:
            for tag in self:
                parent = tag.getparent()
                if parent is not None:
                    if tag.tail:
                        prev = tag.getprevious()
                        if prev is None:
                            if not parent.text:
                                parent.text = ''
                            parent.text += ' ' + tag.tail
                        else:
                            if not prev.tail:
                                prev.tail = ''
                            prev.tail += ' ' + tag.tail
                    parent.remove(tag)
        else:
            results = self.__class__(expr, self)
            results.remove()
        return self

[docs]    class Fn(object):
        """Hook for defining custom function (like the jQuery.fn):

        .. sourcecode:: python

         >>> fn = lambda: this.map(lambda i, el: PyQuery(this).outerHtml())
         >>> PyQuery.fn.listOuterHtml = fn
         >>> S = PyQuery(
         ...   '<ol>   <li>Coffee</li>   <li>Tea</li>   <li>Milk</li>   </ol>')
         >>> S('li').listOuterHtml()
         ['<li>Coffee</li>', '<li>Tea</li>', '<li>Milk</li>']

        """
        def __setattr__(self, name, func):
            def fn(self, *args):
                func_globals(func)['this'] = self
                return func(*args)
            fn.__name__ = name
            setattr(PyQuery, name, fn)
    fn = Fn()

    #####################################################
    # Additional methods that are not in the jQuery API #
    #####################################################

    @property
[docs]    def base_url(self):
        """Return the url of current html document or None if not available.
        """
        if self._base_url is not None:
            return self._base_url
        if self._parent is not no_default:
            return self._parent.base_url

[docs]    def make_links_absolute(self, base_url=None):
        """Make all links absolute.
        """
        if base_url is None:
            base_url = self.base_url
            if base_url is None:
                raise ValueError('You need a base URL to make your links'
                 'absolute. It can be provided by the base_url parameter.')

        self('a').each(lambda: self(this).attr('href', urljoin(base_url, self(this).attr('href')))) # NOQA
        return self
Navigation

Source code for pyquery.pyquery

Quick search

Navigation