Source code for ordf.handler.pt

"""
This is the core read/write storage module in ORDF. Its purpose is
to store RDF graphs in the filesystem in a specialised directory
hierarchy known as a Pairtree_. The reasoning behind this is explained
in the :ref:`rdf-back-end-design` section of this documentation.

.. autoclass:: PairTree
   :show-inheritance:

.. _Pairtree: https://confluence.ucop.edu/display/Curation/PairTree
"""

from pairtree import PairtreeStorageFactory, PartNotFoundException
from pairtree.pairtree_path import id_to_dirpath
from ordf.graph import Graph, _Graph
from ordf.term import URIRef
from ordf.utils import uuid, get_identifier
from ordf.handler import HandlerPlugin
from traceback import format_exc
from logging import getLogger

log = getLogger(__name__)

[docs]class PairTree(HandlerPlugin):
    """
    When adding a *Graph* to this store the first thing that is done is its 
    *uri* is normalised to the *urn:uuid:* namespace. If its idenfifier is
    already in that namespace, nothing is changed. If it is not, it is 
    converted. The original identifier is stored in the filesystem for later
    retrieval so that when a read operation is performed, the result is a
    *Graph* with the correct *uri* identifier. This is done with the utility
    function :func:`ordf.utils.uuid`.
    """
    def __init__(self, store_dir, uri_base="urn:uuid:"):
        f = PairtreeStorageFactory()
        self.store = f.get_store(store_dir=store_dir, uri_base=uri_base)

    def __getitem__(self, key):
        k = uuid(key).lstrip(self.store.uri_base)
        fsobj = self.store.get_object(k)
        #fsobj_dir = id_to_dirpath(k, self.store.pairtree_root)
        #print namaste.get(fsobj_dir)
        try:
            identifier = fsobj.get_bytestream("identifier.txt")
        except PartNotFoundException:
            identifier = get_identifier(key)
        g = Graph(identifier=URIRef(identifier))
        try:
            g.parse(fsobj.get_bytestream("graph.rdf", streamable=True), format="xml")
        except PartNotFoundException:
            pass
        return g

    def __setitem__(self, key, g):
        assert isinstance(g, _Graph)
        k = uuid(key).lstrip(self.store.uri_base)
        fsobj = self.store.get_object(k)
        fsobj.add_bytestream("identifier.txt", g.identifier)
        fsobj.add_bytestream("graph.rdf", g.serialize(format="xml"))

    def __delitem__(self, key):
        k = uuid(key).lstrip(self.store.uri_base)
        self.store.delete_object(k)

    def __iter__(self):
        for uuid in self.store.list_ids():
            identifier = "urn:uuid:" + uuid
            try:
                graph = self[identifier]
                yield graph
            except GeneratorExit:
                return
            except KeyboardInterrupt:
                raise
            except: 
                log.error("error reading %s:\n%s" % (identifier, format_exc()))
Navigation

Source code for ordf.handler.pt

Quick search

Navigation