Source code for invenio_pidstore.models

# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""Persistent identifier store and registration."""

from __future__ import absolute_import, print_function

import logging
import uuid
from enum import Enum

import six
from flask_babelex import gettext
from invenio_db import db
from speaklater import make_lazy_gettext
from sqlalchemy import func
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy_utils.models import Timestamp
from sqlalchemy_utils.types import ChoiceType, UUIDType

from .errors import PIDAlreadyExists, PIDDoesNotExistError, PIDInvalidAction, \
    PIDObjectAlreadyAssigned

_ = make_lazy_gettext(lambda: gettext)

logger = logging.getLogger('invenio-pidstore')


PID_STATUS_TITLES = {
    'NEW': _('New'),
    'RESERVED': _('Reserved'),
    'REGISTERED': _('Registered'),
    'REDIRECTED': _('Redirected'),
    'DELETED': _('Deleted'),
}


[docs]class PIDStatus(Enum): """Constants for possible status of any given PID.""" __order__ = 'NEW RESERVED REGISTERED REDIRECTED DELETED' NEW = 'N' """PID has *not* yet been registered with the service provider.""" RESERVED = 'K' """PID reserved in the service provider but not yet fully registered.""" REGISTERED = 'R' """PID has been registered with the service provider.""" REDIRECTED = 'M' """PID has been redirected to another persistent identifier.""" DELETED = 'D' """PID has been deleted/inactivated with the service provider. This should happen very rarely, and must be kept track of, as the PID should not be reused for something else. """ def __init__(self, value): """Hack.""" def __eq__(self, other): """Equality test.""" return self.value == other def __str__(self): """Return its value.""" return self.value @property def title(self): """Return human readable title.""" return PID_STATUS_TITLES[self.name]
[docs]class PersistentIdentifier(db.Model, Timestamp): """Store and register persistent identifiers. Assumptions: * Persistent identifiers can be represented as a string of max 255 chars. * An object has many persistent identifiers. * A persistent identifier has one and only one object. """ __tablename__ = 'pidstore_pid' __table_args__ = ( db.Index('uidx_type_pid', 'pid_type', 'pid_value', unique=True), db.Index('idx_status', 'status'), db.Index('idx_object', 'object_type', 'object_uuid'), ) id = db.Column(db.Integer, primary_key=True) """Id of persistent identifier entry.""" pid_type = db.Column(db.String(6), nullable=False) """Persistent Identifier Schema.""" pid_value = db.Column(db.String(255), nullable=False) """Persistent Identifier.""" pid_provider = db.Column(db.String(8), nullable=True) """Persistent Identifier Provider""" status = db.Column(ChoiceType(PIDStatus, impl=db.CHAR(1)), nullable=False) """Status of persistent identifier, e.g. registered, reserved, deleted.""" object_type = db.Column(db.String(3), nullable=True) """Object Type - e.g. rec for record.""" object_uuid = db.Column(UUIDType, nullable=True) """Object ID - e.g. a record id.""" # # Class methods # @classmethod
[docs] def create(cls, pid_type, pid_value, pid_provider=None, status=PIDStatus.NEW, object_type=None, object_uuid=None,): """Create a new persistent identifier with specific type and value. :param pid_type: Persistent identifier type. :param pid_value: Persistent identifier value. :param pid_provider: Persistent identifier provider. (default: None). :param status: Current PID status. (Default: :attr:`invenio_pidstore.models.PIDStatus.NEW`) :param object_type: The object type is a string that identify its type. (default: None). :param object_uuid: The object UUID. (default: None). :returns: A :class:`invenio_pidstore.models.PersistentIdentifier` instance. """ try: with db.session.begin_nested(): obj = cls(pid_type=pid_type, pid_value=pid_value, pid_provider=pid_provider, status=status) if object_type and object_uuid: obj.assign(object_type, object_uuid) db.session.add(obj) logger.info("Created PID {0}:{1}".format(pid_type, pid_value), extra={'pid': obj}) except IntegrityError: logger.exception( "PID already exists: {0}:{1}".format(pid_type, pid_value), extra=dict( pid_type=pid_type, pid_value=pid_value, pid_provider=pid_provider, status=status, object_type=object_type, object_uuid=object_uuid, )) raise PIDAlreadyExists(pid_type=pid_type, pid_value=pid_value) except SQLAlchemyError: logger.exception( "Failed to create PID: {0}:{1}".format(pid_type, pid_value), extra=dict( pid_type=pid_type, pid_value=pid_value, pid_provider=pid_provider, status=status, object_type=object_type, object_uuid=object_uuid, )) raise return obj
@classmethod
[docs] def get(cls, pid_type, pid_value, pid_provider=None): """Get persistent identifier. :param pid_type: Persistent identifier type. :param pid_value: Persistent identifier value. :param pid_provider: Persistent identifier provider. (default: None). :raises: :exc:`invenio_pidstore.errors.PIDDoesNotExistError` if no PID is found. :returns: A :class:`invenio_pidstore.models.PersistentIdentifier` instance. """ try: args = dict(pid_type=pid_type, pid_value=six.text_type(pid_value)) if pid_provider: args['pid_provider'] = pid_provider return cls.query.filter_by(**args).one() except NoResultFound: raise PIDDoesNotExistError(pid_type, pid_value)
@classmethod
[docs] def get_by_object(cls, pid_type, object_type, object_uuid): """Get a persistent identifier for a given object. :param pid_type: Persistent identifier type. :param object_type: The object type is a string that identify its type. :param object_uuid: The object UUID. :raises invenio_pidstore.errors.PIDDoesNotExistError: If no PID is found. :returns: A :class:`invenio_pidstore.models.PersistentIdentifier` instance. """ try: return cls.query.filter_by( pid_type=pid_type, object_type=object_type, object_uuid=object_uuid ).one() except NoResultFound: raise PIDDoesNotExistError(pid_type, None)
# # Assigned object methods #
[docs] def has_object(self): """Determine if this PID has an assigned object. :returns: `True` if the PID has a object assigned. """ return bool(self.object_type and self.object_uuid)
[docs] def get_assigned_object(self, object_type=None): """Return the current assigned object UUID. :param object_type: If it's specified, returns only if the PID object_type is the same, otherwise returns None. (default: None). :returns: The object UUID. """ if object_type is not None: if self.object_type == object_type: return self.object_uuid else: return None return self.object_uuid
[docs] def assign(self, object_type, object_uuid, overwrite=False): """Assign this persistent identifier to a given object. Note, the persistent identifier must first have been reserved. Also, if an existing object is already assigned to the pid, it will raise an exception unless overwrite=True. :param object_type: The object type is a string that identify its type. :param object_uuid: The object UUID. :param overwrite: Force PID overwrites in case was previously assigned. :raises invenio_pidstore.errors.PIDInvalidAction: If the PID was previously deleted. :raises invenio_pidstore.errors.PIDObjectAlreadyAssigned: If the PID was previously assigned with a different type/uuid. :returns: `True` if the PID is successfully assigned. """ if self.is_deleted(): raise PIDInvalidAction( "You cannot assign objects to a deleted/redirected persistent" " identifier." ) if not isinstance(object_uuid, uuid.UUID): object_uuid = uuid.UUID(object_uuid) if self.object_type or self.object_uuid: # The object is already assigned to this pid. if object_type == self.object_type and \ object_uuid == self.object_uuid: return True if not overwrite: raise PIDObjectAlreadyAssigned(object_type, object_uuid) self.unassign() try: with db.session.begin_nested(): self.object_type = object_type self.object_uuid = object_uuid db.session.add(self) except SQLAlchemyError: logger.exception("Failed to assign {0}:{1}".format( object_type, object_uuid), extra=dict(pid=self)) raise logger.info("Assigned object {0}:{1}".format( object_type, object_uuid), extra=dict(pid=self)) return True
[docs] def unassign(self): """Unassign the registered object. Note: Only registered PIDs can be redirected so we set it back to registered. :returns: `True` if the PID is successfully unassigned. """ if self.object_uuid is None and self.object_type is None: return True try: with db.session.begin_nested(): if self.is_redirected(): db.session.delete(Redirect.query.get(self.object_uuid)) # Only registered PIDs can be redirected so we set it back # to registered self.status = PIDStatus.REGISTERED self.object_type = None self.object_uuid = None db.session.add(self) except SQLAlchemyError: logger.exception("Failed to unassign object.".format(self), extra=dict(pid=self)) raise logger.info("Unassigned object from {0}.".format(self), extra=dict(pid=self)) return True
[docs] def get_redirect(self): """Get redirected persistent identifier. :returns: The :class:`invenio_pidstore.models.PersistentIdentifier` instance. """ return Redirect.query.get(self.object_uuid).pid
# # Status methods. #
[docs] def redirect(self, pid): """Redirect persistent identifier to another persistent identifier. :param pid: The :class:`invenio_pidstore.models.PersistentIdentifier` where redirect the PID. :raises invenio_pidstore.errors.PIDInvalidAction: If the PID is not registered or is not already redirecting to another PID. :raises invenio_pidstore.errors.PIDDoesNotExistError: If PID is not found. :returns: `True` if the PID is successfully redirect. """ if not (self.is_registered() or self.is_redirected()): raise PIDInvalidAction("Persistent identifier is not registered.") try: with db.session.begin_nested(): if self.is_redirected(): r = Redirect.query.get(self.object_uuid) r.pid = pid else: with db.session.begin_nested(): r = Redirect(pid=pid) db.session.add(r) self.status = PIDStatus.REDIRECTED self.object_type = None self.object_uuid = r.id db.session.add(self) except IntegrityError: raise PIDDoesNotExistError(pid.pid_type, pid.pid_value) except SQLAlchemyError: logger.exception("Failed to redirect to {0}".format( pid), extra=dict(pid=self)) raise logger.info("Redirected PID to {0}".format(pid), extra=dict(pid=self)) return True
[docs] def reserve(self): """Reserve the persistent identifier. Note, the reserve method may be called multiple times, even if it was already reserved. :raises: :exc:`invenio_pidstore.errors.PIDInvalidAction` if the PID is not new or is not already reserved a PID. :returns: `True` if the PID is successfully reserved. """ if not (self.is_new() or self.is_reserved()): raise PIDInvalidAction( "Persistent identifier is not new or reserved.") try: with db.session.begin_nested(): self.status = PIDStatus.RESERVED db.session.add(self) except SQLAlchemyError: logger.exception("Failed to reserve PID.", extra=dict(pid=self)) raise logger.info("Reserved PID.", extra=dict(pid=self)) return True
[docs] def register(self): """Register the persistent identifier with the provider. :raises invenio_pidstore.errors.PIDInvalidAction: If the PID is not already registered or is deleted or is a redirection to another PID. :returns: `True` if the PID is successfully register. """ if self.is_registered() or self.is_deleted() or self.is_redirected(): raise PIDInvalidAction( "Persistent identifier has already been registered" " or is deleted.") try: with db.session.begin_nested(): self.status = PIDStatus.REGISTERED db.session.add(self) except SQLAlchemyError: logger.exception("Failed to register PID.", extra=dict(pid=self)) raise logger.info("Registered PID.", extra=dict(pid=self)) return True
[docs] def delete(self): """Delete the persistent identifier. If the persistent identifier haven't been registered yet, it is removed from the database. Otherwise, it's marked as :data:`invenio_pidstore.models.PIDStatus.DELETED`. :returns: `True` if the PID is successfully removed. """ removed = False try: with db.session.begin_nested(): if self.is_new(): # New persistent identifier which haven't been registered # yet. db.session.delete(self) removed = True else: self.status = PIDStatus.DELETED db.session.add(self) except SQLAlchemyError: logger.exception("Failed to delete PID.", extra=dict(pid=self)) raise if removed: logger.info("Deleted PID (removed).", extra=dict(pid=self)) else: logger.info("Deleted PID.", extra=dict(pid=self)) return True
[docs] def sync_status(self, status): """Synchronize persistent identifier status. Used when the provider uses an external service, which might have been modified outside of our system. :param status: The new status to set. :returns: `True` if the PID is successfully sync. """ if self.status == status: return True try: with db.session.begin_nested(): self.status = status db.session.add(self) except SQLAlchemyError: logger.exception("Failed to sync status {0}.".format(status), extra=dict(pid=self)) raise logger.info("Synced PID status to {0}.".format(status), extra=dict(pid=self)) return True
[docs] def is_redirected(self): """Return true if the persistent identifier has been registered.""" return self.status == PIDStatus.REDIRECTED
[docs] def is_registered(self): """Return true if the persistent identifier has been registered. :returns: A :class:`invenio_pidstore.models.PIDStatus` status. """ return self.status == PIDStatus.REGISTERED
[docs] def is_deleted(self): """Return true if the persistent identifier has been deleted. :returns: A boolean value. """ return self.status == PIDStatus.DELETED
[docs] def is_new(self): """Return true if the PIDhas not yet been registered or reserved. :returns: A boolean value. """ return self.status == PIDStatus.NEW
[docs] def is_reserved(self): """Return true if the PID has not yet been reserved. :returns: A boolean value. """ return self.status == PIDStatus.RESERVED
def __repr__(self): """Get representation of object.""" return "<PersistentIdentifier {0}:{1}{3} ({2})>".format( self.pid_type, self.pid_value, self.status, " / {0}:{1}".format(self.object_type, self.object_uuid) if self.object_type else "" )
[docs]class Redirect(db.Model, Timestamp): """Redirect for a persistent identifier. You can redirect a PID to another one. E.g. .. code-block:: python pid1 = PersistentIdentifier.get(pid_type="recid", pid_value="1") pid2 = PersistentIdentifier.get(pid_type="recid", pid_value="2") pid1.redirect(pid=pid2) assert pid2.pid_value == pid.get_redirect().pid_value """ __tablename__ = 'pidstore_redirect' id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) """Id of redirect entry.""" pid_id = db.Column( db.Integer, db.ForeignKey(PersistentIdentifier.id, onupdate="CASCADE", ondelete="RESTRICT"), nullable=False) """Persistent identifier.""" pid = db.relationship(PersistentIdentifier, backref='redirects') """Relationship to persistent identifier."""
[docs]class RecordIdentifier(db.Model): """Sequence generator for integer record identifiers. The sole purpose of this model is to generate integer record identifiers in sequence using the underlying database's auto increment features in a transaction friendly manner. The feature is primarily provided to support legacy Invenio instances to continue their current record identifier scheme. For new instances we strong encourage to not use auto incrementing record identifiers, but instead use e.g. UUIDs as record identifiers. """ __tablename__ = 'pidstore_recid' recid = db.Column( db.BigInteger().with_variant(db.Integer, "sqlite"), primary_key=True, autoincrement=True) @classmethod
[docs] def next(cls): """Return next available record identifier.""" try: with db.session.begin_nested(): obj = cls() db.session.add(obj) except IntegrityError: # pragma: no cover with db.session.begin_nested(): # Someone has likely modified the table without using the # models API. Let's fix the problem. cls._set_sequence(cls.max()) obj = cls() db.session.add(obj) return obj.recid
@classmethod
[docs] def max(cls): """Get max record identifier.""" max_recid = db.session.query(func.max(cls.recid)).scalar() return max_recid if max_recid else 0
@classmethod def _set_sequence(cls, val): """Internal function to reset sequence to specific value. Note: this function is for PostgreSQL compatibility. :param val: The value to be set. """ if db.engine.dialect.name == 'postgresql': # pragma: no cover db.session.execute( "SELECT setval(pg_get_serial_sequence(" "'{0}', 'recid'), :newval)".format( cls.__tablename__), dict(newval=val)) @classmethod
[docs] def insert(cls, val): """Insert a record identifier. :param val: The `recid` column value to insert. """ with db.session.begin_nested(): obj = cls(recid=val) db.session.add(obj) cls._set_sequence(cls.max())
__all__ = ( 'PersistentIdentifier', 'PIDStatus', 'RecordIdentifier', 'Redirect', )