Source code for pyson.iface.getters

r'''
JSON Paths
==========

JSON-like structures can be represented by combinations between dictionaries 
and lists, with simple immutable types in its nodes. Given the JSON-like 
structure

>>> obj = { "store": {\
...     "book": [ 
...       { "category": "reference",
...         "author": "Nigel Rees",
...         "title": "Sayings of the Century",
...         "discount price": 8.95
...       },
...       { "category": "fiction",
...         "author.name": "Evelyn",
...         "title": "Sword of Honour",
...         "price": 12.99
...       }
...     ]
...   }
... }

Each datum can be addressed by its corresponding address. For example, the 
author "Nigel Rees" can be found under "store", and is the "author" key in the
first "book". This address can be represented by a list of the key and indices 
used to access this information in the data structure, i.e.,   
``['store', 'book', 0, 'author']``. Additionally, these paths can be 
represented by strings, using the notation ``'$.store.book.0.author'``, in 
which ``$`` represents the root node. 

The `as_path` and `as_str_path` functions can convert between the string and 
list representations of these addresses

>>> as_path('$.store.book.0.author')
['store', 'book', 0, 'author']

The items can be accessed uniformly using the `getitem` function using the
string or list notations to represent paths.

>>> getitem(obj, '$.store.book.0.author')
'Nigel Rees'

There is no requirement that the items in a path list should be strings or 
integers. In fact, they can be anything and it is up to the container structures
to support them or not. String paths are more restrictive in which only strings
and integers are allowed. This is more conformant with the JSON spec. 

String keys can be enclosed by quotation marks in order to represent more
complicated values, as in
  
>>> getitem(obj, '$.store.book.0."discount price"')
8.95

One can also test if a given node exists

>>> haspath(obj, '$.store.10."number of viewers"')
False

Notes
-----

This standard is vaguely based on the notation defined at
http://goessner.net/articles/JsonPath/.

Doctests
--------

(Probably this should go to a test module...)
Converting complicated list paths to string paths

>>> as_str_path(['foo', 1, 'bar'])
u'$.foo.1.bar'

>>> as_str_path(['my root', 0, 'she said: "yeah!"'])
u'$."my root".0."she said: \\"yeah!\\""'

API Documentation
-----------------
'''
from pyson.iface.exceptions import *
import re

__all__ = [ 'getitem', 'haspath', 'as_path', 'as_str_path' ]

# Matches alphanumeric names or strings similar to those defined in 
# tokenize.String 
RE_PATH_NODE = re.compile(
    r'''^[ ]*(?P<name>[a-zA-Z]\w*)|(?P<int>-?\d+)|(?P<str1>'[^\n'\\]*(?:\\.[^\n'\\]*)*')|(?P<str2>"[^\n"\\]*(?:\\.[^\n"\\]*)*")[ ]*([.]|$)''')

# Matches valid variable names
RE_NAME = re.compile(r'^[a-zA-Z]\w*$')

[docs]def as_path(path): ''' Convert a string that represents a JSON path into a tuple. ''' if isinstance(path, (tuple, list)): return list(path) # Remove dollar sign if path.startswith('$.'): pattern = path[2:] else: pre, _, new_path = path.partition('.') if not pre.strip() == '$': raise ValueError("invalid path '%s', must start with '$.'" % path) else: pattern = new_path.lstrip() # List that accumulates path nodes path_nodes = [] for _i in xrange(1000): m = RE_PATH_NODE.match(pattern) if m is None: if pattern: converted = ', '.join(path_nodes) msg = "'Invalid path: (%s, ???) from '%s'" % (converted, path) raise ValueError(msg) else: break else: groups = m.groupdict() # Choose action depending on the type of match if groups['name'] is not None: path_nodes.append(groups['name']) elif groups['int'] is not None: path_nodes.append(int(groups['int'])) elif groups['str1'] is not None: aux = groups['str1'][1:-1] path_nodes.append(aux.replace("\\'", "'")) elif groups['str2'] is not None: aux = groups['str2'][1:-1] path_nodes.append(aux.replace('\\"', '"')) else: raise RuntimeError pattern = pattern[m.end():].lstrip().lstrip('.') else: raise RuntimeError('Maximum number of nodes reached: 1000') return path_nodes
[docs]def getitem(obj, path, default=DEFAULT): """ Return the value in a given JSON path of 'obj'. Parameters ---------- obj : JSON-like object JSON structure (list-like or dictionary-like) path : str, iterable The path can be a string of the form ``"$.child.toys.0"`` or an iterable that expands to ``['child', 'toys', 0]`` default Return this value if `obj` does possess the desired path node. Raises ------ KeyIndexError If path is not present in `obj'. """ curr_idx = 0 curr_obj = obj path = as_path(path) try: for k in path: curr_obj = curr_obj[k] curr_idx += 1 return curr_obj except (KeyError, IndexError): if default is not DEFAULT: return default else: base = path[:curr_idx] if base: full_path = as_str_path(base) else: full_path = 'root node' raise IndexKeyError("key does not exist at %s: '%s'" % (full_path, k))
[docs]def as_str_path(path): '''Represents a given 'path' as a valid query string''' if isinstance(path, basestring): #TODO: validate string return path nodes = [u'$'] for node in path: if isinstance(node, basestring): node = unicode(node) if not RE_NAME.match(node): node = node.replace('"', '\\"') node = u'"%s"' % node nodes.append(node) elif isinstance(node, int): nodes.append(str(node)) else: raise TypeError("invalid path element of type %s; only int's and str's are accepted" % type(node)) return u'.'.join(nodes)
[docs]def haspath(obj, path): ''' Return True if ``obj`` has a value associated with the given ``path``. ''' try: _aux_value = getitem(obj, path) return True except IndexKeyError: return False
if __name__ == '__main__': import doctest doctest.testmod(optionflags=doctest.REPORT_ONLY_FIRST_FAILURE, verbose=0)