import sys
import urllib2
from urlparse import urlparse
from datetime import datetime
from logging import getLogger
from rdflib.Graph import ConjunctiveGraph, Graph
from rdflib import URIRef, Literal, Variable, BNode
from rdflib.Namespace import Namespace
from rdflib.store.SPARQL import SPARQLStore
from FuXi.Rete.Util import generateTokenSet
from curate.work import queue
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
DCT = Namespace("http://purl.org/dc/terms/")
HTTP = Namespace("http://www.w3.org/2006/http#")
METH = Namespace("http://www.w3.org/2008/http-methods#")
CURL = Namespace("http://eris.okfn.org/ww/2010/12/curl#")
[docs]class Action(object):
def __init__(self, sbind, obind):
self.sbind = sbind
self.obind = obind
self.log = getLogger("%s(%s, %s)" % (self.__class__.__name__, sbind.n3(), obind.n3()))
self.log.debug("init")
def get(self, term, token):
if isinstance(term, Variable):
for binding in token.bindings:
if term in binding:
return binding[term]
return term
[docs]class httpReq(Action):
def __call__(self, tNode, inferredTriple, token, _binding, debug = False):
"""
Check if the given resource responds with data to an HTTP
REQ request.
"""
resource = self.get(self.sbind, token)
method = self.get(self.obind, token)
self.log.info("%s" % resource)
class _Request(urllib2.Request):
def get_method(self):
return method
request = _Request(resource)
#g = tNode.network.inferredFacts
g = Graph()
conn = BNode()
g.add((conn, RDF["type"], HTTP["Connection"]))
g.add((conn, DCT["date"], Literal(datetime.utcnow())))
parsed = urlparse(resource)
host, port = parsed.hostname, parsed.port
if port is None:
if parsed.scheme == "https:":
port = 443
else:
port = 80
g.add((conn, HTTP["connectionAuthority"], Literal("%s:%s" % (host, port))))
req = BNode()
g.add((conn, HTTP["requests"], req))
g.add((req, RDF["type"], HTTP["Request"]))
g.add((req, HTTP["methodName"], Literal("HEAD")))
g.add((req, HTTP["mthd"], METH["HEAD"]))
g.add((req, HTTP["requestURI"], resource))
if request.headers:
self.record_headers(g, req, request.headers)
resp = BNode()
g.add((req, HTTP["resp"], resp))
g.add((resp, RDF["type"], HTTP["Response"]))
try:
response = urllib2.urlopen(request)
g.add((resp, HTTP["statusCodeNumber"], Literal("%s" % response.getcode())))
if response.headers:
self.record_headers(g, resp, response.headers)
content = response.read(4096)
response.close()
except urllib2.HTTPError, response:
g.add((resp, HTTP["statusCodeNumber"], Literal("%s" % response.getcode())))
if response.headers:
self.record_headers(g, resp, response.headers)
content = response.read(4096)
response.close()
self.log.error("%s %s" % (resource, response))
except urllib2.URLError, e:
g.add((resp, HTTP["statusCodeNumber"], Literal("-1")))
g.add((conn, RDFS["comment"], Literal(e.reason)))
self.log.error("%s %s" % (resource, e))
tNode.network.feedFactsToAdd(generateTokenSet(g))
tNode.network.inferredFacts += g
def record_headers(self, g, msg, hdict):
for h in hdict:
head = BNode()
g.add((msg, HTTP["headers"], head))
g.add((head, RDF["type"], HTTP["MessageHeader"]))
g.add((head, HTTP["fieldName"], Literal(h)))
g.add((head, HTTP["fieldValue"], Literal(hdict[h])))
[docs]class curlReq(Action):
def http_conn(self, g, resource, method):
conn = BNode()
g.add((conn, RDF["type"], HTTP["Connection"]))
g.add((conn, DCT["date"], Literal(datetime.utcnow())))
parsed = urlparse(resource)
host = parsed.hostname
try:
port = parsed.port
except:
port = None
if port is None:
if parsed.scheme == "https:":
port = 443
else:
port = 80
g.add((conn, HTTP["connectionAuthority"], Literal("%s:%s" % (host, port))))
req = BNode()
g.add((conn, HTTP["requests"], req))
g.add((req, RDF["type"], HTTP["Request"]))
g.add((req, HTTP["methodName"], Literal(method)))
g.add((req, HTTP["mthd"], METH[method]))
g.add((req, HTTP["requestURI"], resource))
resp = BNode()
g.add((req, HTTP["resp"], resp))
g.add((resp, RDF["type"], HTTP["Response"]))
return resp
def generic_conn(self, g, resource, method):
conn = BNode()
g.add((conn, RDF["type"], CURL["Curl"]))
g.add((conn, DCT["date"], Literal(datetime.utcnow())))
g.add((conn, CURL["uri"], resource))
return conn
def recode(self, s):
try: return s.decode("utf-8")
except: pass
try: return s.decode("latin1")
except: pass
try: return s.decode("koi8-r")
except: pass
raise UnicodeError(s)
def __call__(self, tNode, inferredTriple, token, _binding, debug = False):
import pycurl
resource = self.get(self.sbind, token)
method = self.get(self.obind, token)
self.log.info("Fetching %s" % resource)
parsed_resource = urlparse(resource)
g = Graph()
c = pycurl.Curl()
c.setopt(c.URL, str(resource))
c.setopt(c.FAILONERROR, 0)
c.setopt(c.FOLLOWLOCATION, 1)
c.setopt(c.MAXREDIRS, 5)
c.setopt(c.CONNECTTIMEOUT, 10)
if parsed_resource.scheme in ("http", "https"):
resp = self.http_conn(g, resource, method)
def handle_header(h):
h = h.strip()
if ":" in h:
k, v = [x.strip() for x in h.split(":", 1)]
head = BNode()
g.add((resp, HTTP["headers"], head))
g.add((head, RDF["type"], HTTP["MessageHeader"]))
g.add((head, HTTP["fieldName"], Literal(k)))
g.add((head, HTTP["fieldValue"], Literal(v)))
def handle_data(s):
s = self.recode(s)
g.add((resp, HTTP["body"], Literal(s)))
return 0
c.setopt(c.HEADERFUNCTION, handle_header)
c.setopt(c.WRITEFUNCTION, handle_data)
if method == u"HEAD":
c.setopt(c.NOBODY, 1)
else:
resp = self.generic_conn(g, resource, method)
def handle_data(s):
s = self.recode(s)
g.add((resp, CURL["data"], Literal(s)))
return 0
c.setopt(c.WRITEFUNCTION, handle_data)
if parsed_resource.scheme in ("https"):
c.setopt(c.SSL_VERIFYPEER, 0)
success = False
try:
c.perform()
success = True
except pycurl.error, e:
## why do we have to do this manually here?
sys.exc_clear()
if e.args[0] == pycurl.E_WRITE_ERROR:
## we purposely fail the write in order not to
## download the whole thing
success = True
else:
g.add((resp, RDFS["comment"], Literal(e.args[1])))
if parsed_resource.scheme in ("http", "https"):
code = c.getinfo(c.HTTP_CODE)
g.add((resp, HTTP["statusCodeNumber"], Literal("%s" % code)))
if code > 399:
success = False
if success:
g.add((resp, CURL["status"], CURL["Success"]))
else:
g.add((resp, CURL["status"], CURL["Failure"]))
c.close()
tNode.network.feedFactsToAdd(generateTokenSet(g))
tNode.network.inferredFacts += g
[docs]class addTag(Action):
def __call__(self, tNode, inferredTriple, token, _binding, debug = False):
dataset = self.get(self.sbind, token)
tag = self.get(self.obind, token)
queue.add(dataset, "tags", [unicode(tag)])
[docs]class delTag(Action):
def __call__(self, tNode, inferredTriple, token, _binding, debug = False):
dataset = self.get(self.sbind, token)
tag = self.get(self.obind, token)
queue.remove(dataset, "tags", [unicode(tag)])
[docs]class addGroup(Action):
def __call__(self, tNode, inferredTriple, token, _binding, debug = False):
dataset = self.get(self.sbind, token)
group = self.get(self.obind, token)
if isinstance(group, URIRef):
_, group = group.rsplit("/", 1)
elif isinstance(group, Literal):
_, group = unicode(group)
else:
return
queue.add(dataset, "groups", [unicode(group)])