Source code for ordf.handler.pt

"""
This is the core read/write storage module in ORDF. Its purpose is
to store RDF graphs in the filesystem in a specialised directory
hierarchy known as a Pairtree_. The reasoning behind this is explained
in the :ref:`rdf-back-end-design` section of this documentation.

.. autoclass:: PairTree
   :show-inheritance:

.. _Pairtree: https://confluence.ucop.edu/display/Curation/PairTree
"""

from pairtree import PairtreeStorageFactory, PartNotFoundException
from pairtree.pairtree_path import id_to_dirpath
from ordf.graph import Graph, _Graph
from ordf.term import URIRef
from ordf.utils import uuid, get_identifier
from ordf.handler import HandlerPlugin
from traceback import format_exc
from logging import getLogger

log = getLogger(__name__)

[docs]class PairTree(HandlerPlugin): """ When adding a *Graph* to this store the first thing that is done is its *uri* is normalised to the *urn:uuid:* namespace. If its idenfifier is already in that namespace, nothing is changed. If it is not, it is converted. The original identifier is stored in the filesystem for later retrieval so that when a read operation is performed, the result is a *Graph* with the correct *uri* identifier. This is done with the utility function :func:`ordf.utils.uuid`. """ def __init__(self, store_dir, uri_base="urn:uuid:"): f = PairtreeStorageFactory() self.store = f.get_store(store_dir=store_dir, uri_base=uri_base) def __getitem__(self, key): k = uuid(key).lstrip(self.store.uri_base) fsobj = self.store.get_object(k) #fsobj_dir = id_to_dirpath(k, self.store.pairtree_root) #print namaste.get(fsobj_dir) try: identifier = fsobj.get_bytestream("identifier.txt") except PartNotFoundException: identifier = get_identifier(key) g = Graph(identifier=URIRef(identifier)) try: g.parse(fsobj.get_bytestream("graph.rdf", streamable=True), format="xml") except PartNotFoundException: pass return g def __setitem__(self, key, g): assert isinstance(g, _Graph) k = uuid(key).lstrip(self.store.uri_base) fsobj = self.store.get_object(k) fsobj.add_bytestream("identifier.txt", g.identifier) fsobj.add_bytestream("graph.rdf", g.serialize(format="xml")) def __delitem__(self, key): k = uuid(key).lstrip(self.store.uri_base) self.store.delete_object(k) def __iter__(self): for uuid in self.store.list_ids(): identifier = "urn:uuid:" + uuid try: graph = self[identifier] yield graph except GeneratorExit: return except KeyboardInterrupt: raise except: log.error("error reading %s:\n%s" % (identifier, format_exc()))