# Copyright (c) 2009, Digital Enterprise Research Institute (DERI),
# NUI Galway
# All rights reserved.
# author: Cosmin Basca
# email: cosmin.basca@gmail.com
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with
# the distribution.
# * Neither the name of DERI nor the
# names of its contributors may be used to endorse or promote
# products derived from this software without specific prior
# written permission.
# THIS SOFTWARE IS PROVIDED BY DERI ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DERI BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# -*- coding: utf-8 -*-
__author__ = 'Cosmin Basca'
import logging
import re
from surf.rdf import BNode, Graph, ConjunctiveGraph, Literal, Namespace
from surf.rdf import RDF, URIRef
a = RDF['type']
SELECT = 'select'
ASK = 'ask'
CONSTRUCT = 'construct'
DESCRIBE = 'describe'
DISTINCT = 'distinct'
REDUCED = 'reduced'
UNION = 'union'
#the classes
[docs]class Group(list):
'''A **SPARQL** triple pattern group
'''
pass
[docs]class NamedGroup(Group):
'''A **SPARQL** triple pattern named group
'''
def __init__(self, name = None):
Group.__init__(self)
if isinstance(name, URIRef) or (type(name) in [str, unicode] and name.startswith('?')):
self.name = name
else:
raise ValueError('The names')
[docs]class OptionalGroup(Group):
'''A **SPARQL** triple pattern optional group
'''
pass
[docs]class Union(Group):
'''A **SPARQL** union
'''
pass
[docs]class Filter(unicode):
'''A **SPARQL** triple pattern filter
'''
@classmethod
def regex(cls, var, pattern, flag = None):
if type(var) in [str, unicode] and var.startswith('?'): pass
else: raise ValueError('not a filter variable')
if type(pattern) in [str, unicode]: pass
elif type(pattern) is Literal: pattern = '"%s"@%s' % (pattern, pattern.language)
elif type(pattern) in [list, tuple]: pattern = '"%s"@%s' % (pattern[0], pattern[1])
else: raise ValueError('regular expression')
if flag is None:
flag = ""
else:
if not type(flag) in [str, unicode]:
raise ValueError('not a filter flag')
return Filter('regex(%s,"%s"%s)' % (var, pattern, ',"%s"' % flag))
[docs]class Query(object):
"""
The `Query` object is used by SuRF to construct queries in a programatic
manner. The class supports the major SPARQL query types: *select*, *ask*,
*describe*, *construct*. Although it follows the SPARQL format the query
can be translated to other Query formats such as PROLOG, for now
though only SPARQL is supported.
Query objects should not be instatiated directly, instead use module-level
:func:`ask`, :func:`construct`, :func:`describe`, :func:`select` functions.
Query methods can be chained.
"""
STATEMENT_TYPES = [list, tuple, Group, NamedGroup, OptionalGroup,
Union, Filter] # + Query, but cannot reference it here.
AGGREGATE_FUCTIONS = ["count", "min", "max", "avg"]
TYPES = [SELECT, ASK, CONSTRUCT, DESCRIBE]
def __init__(self, type, *vars):
if type not in self.TYPES:
raise ValueError('''The query is not of a supported type [%s], supported
types are %s''' % (type, str(Query.TYPES)))
self._type = type
self._modifier = None
self._vars = [var for var in vars if self._validate_variable(var)]
self._from = []
self._from_named = []
self._data = []
self._limit = None
self._offset = None
self._order_by = []
query_type = property(fget = lambda self: self._type)
'''the query `type` can be: *SELECT*, *ASK*, *DESCRIBE*or *CONSTRUCT*'''
query_modifier = property(fget = lambda self: self._modifier)
'''the query `modifier` can be: *DISTINCT*, *REDUCED*, or `None`'''
query_vars = property(fget = lambda self: self._vars)
'''the query `variables` to return as the resultset'''
query_from = property(fget = lambda self: self._from)
'''list of URIs that will go into query FROM clauses'''
query_from_named = property(fget = lambda self: self._from_named)
'''list of URIs that will go into query FROM NAMED clauses'''
query_data = property(fget = lambda self: self._data)
'''the query `data`, internal structure representing the contents of the *WHERE* clause'''
query_limit = property(fget = lambda self: self._limit)
'''the query `limit`, can be a number or None'''
query_offset = property(fget = lambda self: self._offset)
'''the query `offset`, can be a number or None'''
query_order_by = property(fget = lambda self: self._order_by)
'''the query `order by` variables'''
def _validate_variable(self, var):
if type(var) in [str, unicode]:
if var.startswith('?'):
return True
elif re.match('\s*\(\s*.+\s+AS\s+\?.+\)\s*$', var):
# SPARQL 1.1 expressions http://www.w3.org/TR/sparql11-query/#rSelectClause
return True
else:
for aggregate in Query.AGGREGATE_FUCTIONS:
if var.lower().startswith(aggregate):
return True
raise ValueError('''Not a variable : <%s>, check correct syntax ("?",
expression, or supported aggregate %s)'''
% (var, str(Query.AGGREGATE_FUCTIONS)))
else:
raise ValueError('''Unknown variable type, all variables must either
start with a "?" or be among the recognized aggregates :
%s''' % Query.AGGREGATE_FUCTIONS)
[docs] def distinct(self):
""" Add *DISTINCT* modifier. """
self._modifier = DISTINCT
return self
[docs] def reduced(self):
""" Add *REDUCED* modifier. """
self._modifier = REDUCED
return self
[docs] def from_(self, *uris):
""" Add graph URI(s) that will go in separate *FROM* clause.
Each argument can be either `string` or :class:`surf.rdf.URIRef`.
"""
for uri in uris:
if uri is None:
raise ValueError("Invalid graph URI")
self._from += uris
return self
[docs] def from_named(self, *uris):
""" Add graph URI(s) that will go in separate *FROM NAMED* clause.
Each argument can be either `string` or :class:`surf.rdf.URIRef`.
"""
for uri in uris:
if uri is None:
raise ValueError("Invalid graph URI")
self._from_named += uris
return self
[docs] def where(self, *statements):
""" Add graph pattern(s) to *WHERE* clause.
`where()` accepts multiple arguments. Each argument represents a
a graph pattern and will be added to default group graph pattern.
Each argument can be `tuple`, `list`, :class:`surf.query.Query`,
:class:`surf.query.NamedGroup`, :class:`surf.query.OptionalGroup`.
Example:
>>> query = select("?s").where(("?s", a, surf.ns.FOAF["person"]))
"""
self._data.extend([stmt for stmt in statements if validate_statement(stmt)])
return self
[docs] def optional_group(self, *statements):
""" Add optional group graph pattern to *WHERE* clause.
`optional_group()` accepts multiple arguments, similarly
to :meth:`where()`.
"""
g = OptionalGroup()
g.extend([stmt for stmt in statements if validate_statement(stmt)])
self._data.append(g)
return self
def group(self, *statements):
g = Group()
g.extend([stmt for stmt in statements if validate_statement(stmt)])
self._data.append(g)
return self
def union(self, *statements):
g = Union()
g.extend([stmt for stmt in statements if validate_statement(stmt)])
self._data.append(g)
return self
[docs] def named_group(self, name, *statements):
""" Add ``GROUP ?name { ... }`` construct to *WHERE* clause.
``name`` is the variable name that will be bound to graph IRI.
``*statements`` is one or more graph patterns.
Example:
>>> import surf
>>> from surf.query import a, select
>>> query = select("?s", "?src").named_group("?src", ("?s", a, surf.ns.FOAF['Person']))
>>> print unicode(query)
SELECT ?s ?src WHERE { GRAPH ?src { ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> } }
"""
g = NamedGroup(name)
g.extend([stmt for stmt in statements if validate_statement(stmt)])
self._data.append(g)
return self
[docs] def filter(self, filter):
""" Add *FILTER* construct to query *WHERE* clause.
``filter`` must be either `string`/`unicode` or
:class:`surf.query.Filter` object, if it is `None` then no filter
is appended.
"""
if not filter:
return self
elif type(filter) in [str, unicode]:
filter = Filter(filter)
elif type(filter) is not Filter:
raise ValueError('the filter must be of type Filter, str or unicode following the syntax of the query language')
self._data.append(filter)
return self
[docs] def limit(self, limit):
""" Add *LIMIT* modifier to query. """
if limit:
self._limit = limit
return self
[docs] def offset(self, offset):
""" Add *OFFSET* modifier to query. """
if offset:
self._offset = offset
return self
[docs] def order_by(self, *vars):
""" Add *ORDER_BY* modifier to query. """
pattern = re.compile("(asc|desc)\(\?\w+\)|\?\w+", re.I)
for var in vars:
if re.match(pattern, var):
self._order_by.append(var)
return self
def __unicode__(self):
# Importing here to avoid circular imports.
from surf.query.translator.sparql import SparqlTranslator
return SparqlTranslator(self).translate()
def __str__(self):
return unicode(self).encode("utf-8")
def validate_statement(statement):
if type(statement) in Query.STATEMENT_TYPES or isinstance(statement, Query):
if type(statement) in [list, tuple]:
try:
s, p, o = statement
except:
raise ValueError('''Statement of type [list, tuple] does not
have all the (s,p,o) members (the length of the
supplied arguemnt must be at least 3)''')
if type(s) in [URIRef, BNode] or \
(type(s) in [str, unicode] and s.startswith('?')): pass
else: raise ValueError('The subject is not a valid variable type')
if type(p) in [URIRef] or \
(type(p) in [str, unicode] and p.startswith('?')): pass
else: raise ValueError('The predicate is not a valid variable type')
if type(o) in [URIRef, BNode, Literal] or \
(type(o) in [str, unicode] and o.startswith('?')): pass
else:
raise ValueError('The object is not a valid variable type: %s' % o)
return True
else:
raise ValueError('Statement type not in %s' % str(Query.STATEMENT_TYPES))
[docs]def optional_group(*statements):
""" Return optional group graph pattern.
Returned object can be used as argument in :meth:`Query.where` method.
`optional_group()` accepts multiple arguments, similarly
to :meth:`Query.where()`.
"""
g = OptionalGroup()
g.extend([stmt for stmt in statements if validate_statement(stmt)])
return g
[docs]def group(*statements):
""" Return group graph pattern.
Returned object can be used as argument in :meth:`Query.where` method.
group()` accepts multiple arguments, similarly
to :meth:`Query.where()`.
"""
g = Group()
g.extend([stmt for stmt in statements if validate_statement(stmt)])
return g
[docs]def union(*statements):
""" Return union graph pattern.
Returned object can be used as argument in :meth:`Query.where` method.
union()` accepts multiple arguments, similarly
to :meth:`Query.where()`.
"""
g = Union()
g.extend([stmt for stmt in statements if validate_statement(stmt)])
return g
[docs]def named_group(name, *statements):
""" Return named group graph pattern.
Returned object can be used as argument in :meth:`Query.where` method.
``*statements`` is one or more graph patterns.
Example:
>>> import surf
>>> from surf.query import a, select, named_group
>>> query = select("?s", "?src").where(named_group("?src", ("?s", a, surf.ns.FOAF['Person'])))
>>> print unicode(query)
SELECT ?s ?src WHERE { GRAPH ?src { ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> } }
"""
g = NamedGroup(name)
g.extend([stmt for stmt in statements if validate_statement(stmt)])
return g
# the query creators
[docs]def select(*vars):
""" Construct and return :class:`surf.query.Query` object of type **SELECT**
``*vars`` are variables to be selected.
Example:
>>> query = select("?s", "?p", "?o")
"""
return Query(SELECT, *vars)
[docs]def ask():
""" Construct and return :class:`surf.query.Query` object of type **ASK** """
return Query(ASK)
[docs]def construct(*vars):
""" Construct and return :class:`surf.query.Query` object of type **CONSTRUCT** """
return Query(CONSTRUCT, *vars)
[docs]def describe(*vars):
""" Construct and return :class:`surf.query.Query` object of type **DESCRIBE** """
return Query(DESCRIBE, *vars)