# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2014, 2015, 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
"""Persistent identifier store and registration."""
from __future__ import absolute_import, print_function
import logging
import uuid
from enum import Enum
import six
from flask_babelex import gettext
from invenio_db import db
from speaklater import make_lazy_gettext
from sqlalchemy import func
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy_utils.models import Timestamp
from sqlalchemy_utils.types import ChoiceType, UUIDType
from .errors import PIDAlreadyExists, PIDDoesNotExistError, PIDInvalidAction, \
PIDObjectAlreadyAssigned
_ = make_lazy_gettext(lambda: gettext)
logger = logging.getLogger('invenio-pidstore')
PID_STATUS_TITLES = {
'NEW': _('New'),
'RESERVED': _('Reserved'),
'REGISTERED': _('Registered'),
'REDIRECTED': _('Redirected'),
'DELETED': _('Deleted'),
}
[docs]class PIDStatus(Enum):
"""Constants for possible status of any given PID."""
__order__ = 'NEW RESERVED REGISTERED REDIRECTED DELETED'
NEW = 'N'
"""PID has *not* yet been registered with the service provider."""
RESERVED = 'K'
"""PID reserved in the service provider but not yet fully registered."""
REGISTERED = 'R'
"""PID has been registered with the service provider."""
REDIRECTED = 'M'
"""PID has been redirected to another persistent identifier."""
DELETED = 'D'
"""PID has been deleted/inactivated with the service provider.
This should happen very rarely, and must be kept track of, as the PID
should not be reused for something else.
"""
def __init__(self, value):
"""Hack."""
def __eq__(self, other):
"""Equality test."""
return self.value == other
def __str__(self):
"""Return its value."""
return self.value
@property
def title(self):
"""Return human readable title."""
return PID_STATUS_TITLES[self.name]
[docs]class PersistentIdentifier(db.Model, Timestamp):
"""Store and register persistent identifiers.
Assumptions:
* Persistent identifiers can be represented as a string of max 255 chars.
* An object has many persistent identifiers.
* A persistent identifier has one and only one object.
"""
__tablename__ = 'pidstore_pid'
__table_args__ = (
db.Index('uidx_type_pid', 'pid_type', 'pid_value', unique=True),
db.Index('idx_status', 'status'),
db.Index('idx_object', 'object_type', 'object_uuid'),
)
id = db.Column(db.Integer, primary_key=True)
"""Id of persistent identifier entry."""
pid_type = db.Column(db.String(6), nullable=False)
"""Persistent Identifier Schema."""
pid_value = db.Column(db.String(255), nullable=False)
"""Persistent Identifier."""
pid_provider = db.Column(db.String(8), nullable=True)
"""Persistent Identifier Provider"""
status = db.Column(ChoiceType(PIDStatus, impl=db.CHAR(1)), nullable=False)
"""Status of persistent identifier, e.g. registered, reserved, deleted."""
object_type = db.Column(db.String(3), nullable=True)
"""Object Type - e.g. rec for record."""
object_uuid = db.Column(UUIDType, nullable=True)
"""Object ID - e.g. a record id."""
#
# Class methods
#
@classmethod
[docs] def create(cls, pid_type, pid_value, pid_provider=None,
status=PIDStatus.NEW, object_type=None, object_uuid=None,):
"""Create a new persistent identifier with specific type and value.
:param pid_type: Persistent identifier type.
:param pid_value: Persistent identifier value.
:param pid_provider: Persistent identifier provider. (default: None).
:param status: Current PID status.
(Default: :attr:`invenio_pidstore.models.PIDStatus.NEW`)
:param object_type: The object type is a string that identify its type.
(default: None).
:param object_uuid: The object UUID. (default: None).
:returns: A :class:`invenio_pidstore.models.PersistentIdentifier`
instance.
"""
try:
with db.session.begin_nested():
obj = cls(pid_type=pid_type,
pid_value=pid_value,
pid_provider=pid_provider,
status=status)
if object_type and object_uuid:
obj.assign(object_type, object_uuid)
db.session.add(obj)
logger.info("Created PID {0}:{1}".format(pid_type, pid_value),
extra={'pid': obj})
except IntegrityError:
logger.exception(
"PID already exists: {0}:{1}".format(pid_type, pid_value),
extra=dict(
pid_type=pid_type,
pid_value=pid_value,
pid_provider=pid_provider,
status=status,
object_type=object_type,
object_uuid=object_uuid,
))
raise PIDAlreadyExists(pid_type=pid_type, pid_value=pid_value)
except SQLAlchemyError:
logger.exception(
"Failed to create PID: {0}:{1}".format(pid_type, pid_value),
extra=dict(
pid_type=pid_type,
pid_value=pid_value,
pid_provider=pid_provider,
status=status,
object_type=object_type,
object_uuid=object_uuid,
))
raise
return obj
@classmethod
[docs] def get(cls, pid_type, pid_value, pid_provider=None):
"""Get persistent identifier.
:param pid_type: Persistent identifier type.
:param pid_value: Persistent identifier value.
:param pid_provider: Persistent identifier provider. (default: None).
:raises: :exc:`invenio_pidstore.errors.PIDDoesNotExistError` if no
PID is found.
:returns: A :class:`invenio_pidstore.models.PersistentIdentifier`
instance.
"""
try:
args = dict(pid_type=pid_type, pid_value=six.text_type(pid_value))
if pid_provider:
args['pid_provider'] = pid_provider
return cls.query.filter_by(**args).one()
except NoResultFound:
raise PIDDoesNotExistError(pid_type, pid_value)
@classmethod
[docs] def get_by_object(cls, pid_type, object_type, object_uuid):
"""Get a persistent identifier for a given object.
:param pid_type: Persistent identifier type.
:param object_type: The object type is a string that identify its type.
:param object_uuid: The object UUID.
:raises invenio_pidstore.errors.PIDDoesNotExistError: If no PID is
found.
:returns: A :class:`invenio_pidstore.models.PersistentIdentifier`
instance.
"""
try:
return cls.query.filter_by(
pid_type=pid_type,
object_type=object_type,
object_uuid=object_uuid
).one()
except NoResultFound:
raise PIDDoesNotExistError(pid_type, None)
#
# Assigned object methods
#
[docs] def has_object(self):
"""Determine if this PID has an assigned object.
:returns: `True` if the PID has a object assigned.
"""
return bool(self.object_type and self.object_uuid)
[docs] def get_assigned_object(self, object_type=None):
"""Return the current assigned object UUID.
:param object_type: If it's specified, returns only if the PID
object_type is the same, otherwise returns None. (default: None).
:returns: The object UUID.
"""
if object_type is not None:
if self.object_type == object_type:
return self.object_uuid
else:
return None
return self.object_uuid
[docs] def assign(self, object_type, object_uuid, overwrite=False):
"""Assign this persistent identifier to a given object.
Note, the persistent identifier must first have been reserved. Also,
if an existing object is already assigned to the pid, it will raise an
exception unless overwrite=True.
:param object_type: The object type is a string that identify its type.
:param object_uuid: The object UUID.
:param overwrite: Force PID overwrites in case was previously assigned.
:raises invenio_pidstore.errors.PIDInvalidAction: If the PID was
previously deleted.
:raises invenio_pidstore.errors.PIDObjectAlreadyAssigned: If the PID
was previously assigned with a different type/uuid.
:returns: `True` if the PID is successfully assigned.
"""
if self.is_deleted():
raise PIDInvalidAction(
"You cannot assign objects to a deleted/redirected persistent"
" identifier."
)
if not isinstance(object_uuid, uuid.UUID):
object_uuid = uuid.UUID(object_uuid)
if self.object_type or self.object_uuid:
# The object is already assigned to this pid.
if object_type == self.object_type and \
object_uuid == self.object_uuid:
return True
if not overwrite:
raise PIDObjectAlreadyAssigned(object_type,
object_uuid)
self.unassign()
try:
with db.session.begin_nested():
self.object_type = object_type
self.object_uuid = object_uuid
db.session.add(self)
except SQLAlchemyError:
logger.exception("Failed to assign {0}:{1}".format(
object_type, object_uuid), extra=dict(pid=self))
raise
logger.info("Assigned object {0}:{1}".format(
object_type, object_uuid), extra=dict(pid=self))
return True
[docs] def unassign(self):
"""Unassign the registered object.
Note:
Only registered PIDs can be redirected so we set it back to registered.
:returns: `True` if the PID is successfully unassigned.
"""
if self.object_uuid is None and self.object_type is None:
return True
try:
with db.session.begin_nested():
if self.is_redirected():
db.session.delete(Redirect.query.get(self.object_uuid))
# Only registered PIDs can be redirected so we set it back
# to registered
self.status = PIDStatus.REGISTERED
self.object_type = None
self.object_uuid = None
db.session.add(self)
except SQLAlchemyError:
logger.exception("Failed to unassign object.".format(self),
extra=dict(pid=self))
raise
logger.info("Unassigned object from {0}.".format(self),
extra=dict(pid=self))
return True
[docs] def get_redirect(self):
"""Get redirected persistent identifier.
:returns: The :class:`invenio_pidstore.models.PersistentIdentifier`
instance.
"""
return Redirect.query.get(self.object_uuid).pid
#
# Status methods.
#
[docs] def redirect(self, pid):
"""Redirect persistent identifier to another persistent identifier.
:param pid: The :class:`invenio_pidstore.models.PersistentIdentifier`
where redirect the PID.
:raises invenio_pidstore.errors.PIDInvalidAction: If the PID is not
registered or is not already redirecting to another PID.
:raises invenio_pidstore.errors.PIDDoesNotExistError: If PID is not
found.
:returns: `True` if the PID is successfully redirect.
"""
if not (self.is_registered() or self.is_redirected()):
raise PIDInvalidAction("Persistent identifier is not registered.")
try:
with db.session.begin_nested():
if self.is_redirected():
r = Redirect.query.get(self.object_uuid)
r.pid = pid
else:
with db.session.begin_nested():
r = Redirect(pid=pid)
db.session.add(r)
self.status = PIDStatus.REDIRECTED
self.object_type = None
self.object_uuid = r.id
db.session.add(self)
except IntegrityError:
raise PIDDoesNotExistError(pid.pid_type, pid.pid_value)
except SQLAlchemyError:
logger.exception("Failed to redirect to {0}".format(
pid), extra=dict(pid=self))
raise
logger.info("Redirected PID to {0}".format(pid), extra=dict(pid=self))
return True
[docs] def reserve(self):
"""Reserve the persistent identifier.
Note, the reserve method may be called multiple times, even if it was
already reserved.
:raises: :exc:`invenio_pidstore.errors.PIDInvalidAction` if the PID is
not new or is not already reserved a PID.
:returns: `True` if the PID is successfully reserved.
"""
if not (self.is_new() or self.is_reserved()):
raise PIDInvalidAction(
"Persistent identifier is not new or reserved.")
try:
with db.session.begin_nested():
self.status = PIDStatus.RESERVED
db.session.add(self)
except SQLAlchemyError:
logger.exception("Failed to reserve PID.", extra=dict(pid=self))
raise
logger.info("Reserved PID.", extra=dict(pid=self))
return True
[docs] def register(self):
"""Register the persistent identifier with the provider.
:raises invenio_pidstore.errors.PIDInvalidAction: If the PID is not
already registered or is deleted or is a redirection to another
PID.
:returns: `True` if the PID is successfully register.
"""
if self.is_registered() or self.is_deleted() or self.is_redirected():
raise PIDInvalidAction(
"Persistent identifier has already been registered"
" or is deleted.")
try:
with db.session.begin_nested():
self.status = PIDStatus.REGISTERED
db.session.add(self)
except SQLAlchemyError:
logger.exception("Failed to register PID.", extra=dict(pid=self))
raise
logger.info("Registered PID.", extra=dict(pid=self))
return True
[docs] def delete(self):
"""Delete the persistent identifier.
If the persistent identifier haven't been registered yet, it is
removed from the database.
Otherwise, it's marked as
:data:`invenio_pidstore.models.PIDStatus.DELETED`.
:returns: `True` if the PID is successfully removed.
"""
removed = False
try:
with db.session.begin_nested():
if self.is_new():
# New persistent identifier which haven't been registered
# yet.
db.session.delete(self)
removed = True
else:
self.status = PIDStatus.DELETED
db.session.add(self)
except SQLAlchemyError:
logger.exception("Failed to delete PID.", extra=dict(pid=self))
raise
if removed:
logger.info("Deleted PID (removed).", extra=dict(pid=self))
else:
logger.info("Deleted PID.", extra=dict(pid=self))
return True
[docs] def sync_status(self, status):
"""Synchronize persistent identifier status.
Used when the provider uses an external service, which might have been
modified outside of our system.
:param status: The new status to set.
:returns: `True` if the PID is successfully sync.
"""
if self.status == status:
return True
try:
with db.session.begin_nested():
self.status = status
db.session.add(self)
except SQLAlchemyError:
logger.exception("Failed to sync status {0}.".format(status),
extra=dict(pid=self))
raise
logger.info("Synced PID status to {0}.".format(status),
extra=dict(pid=self))
return True
[docs] def is_redirected(self):
"""Return true if the persistent identifier has been registered."""
return self.status == PIDStatus.REDIRECTED
[docs] def is_registered(self):
"""Return true if the persistent identifier has been registered.
:returns: A :class:`invenio_pidstore.models.PIDStatus` status.
"""
return self.status == PIDStatus.REGISTERED
[docs] def is_deleted(self):
"""Return true if the persistent identifier has been deleted.
:returns: A boolean value.
"""
return self.status == PIDStatus.DELETED
[docs] def is_new(self):
"""Return true if the PIDhas not yet been registered or reserved.
:returns: A boolean value.
"""
return self.status == PIDStatus.NEW
[docs] def is_reserved(self):
"""Return true if the PID has not yet been reserved.
:returns: A boolean value.
"""
return self.status == PIDStatus.RESERVED
def __repr__(self):
"""Get representation of object."""
return "<PersistentIdentifier {0}:{1}{3} ({2})>".format(
self.pid_type, self.pid_value, self.status,
" / {0}:{1}".format(self.object_type, self.object_uuid) if
self.object_type else ""
)
[docs]class Redirect(db.Model, Timestamp):
"""Redirect for a persistent identifier.
You can redirect a PID to another one.
E.g.
.. code-block:: python
pid1 = PersistentIdentifier.get(pid_type="recid", pid_value="1")
pid2 = PersistentIdentifier.get(pid_type="recid", pid_value="2")
pid1.redirect(pid=pid2)
assert pid2.pid_value == pid.get_redirect().pid_value
"""
__tablename__ = 'pidstore_redirect'
id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
"""Id of redirect entry."""
pid_id = db.Column(
db.Integer,
db.ForeignKey(PersistentIdentifier.id, onupdate="CASCADE",
ondelete="RESTRICT"),
nullable=False)
"""Persistent identifier."""
pid = db.relationship(PersistentIdentifier, backref='redirects')
"""Relationship to persistent identifier."""
[docs]class RecordIdentifier(db.Model):
"""Sequence generator for integer record identifiers.
The sole purpose of this model is to generate integer record identifiers in
sequence using the underlying database's auto increment features in a
transaction friendly manner. The feature is primarily provided to support
legacy Invenio instances to continue their current record identifier
scheme. For new instances we strong encourage to not use auto incrementing
record identifiers, but instead use e.g. UUIDs as record identifiers.
"""
__tablename__ = 'pidstore_recid'
recid = db.Column(
db.BigInteger().with_variant(db.Integer, "sqlite"),
primary_key=True, autoincrement=True)
@classmethod
[docs] def next(cls):
"""Return next available record identifier."""
try:
with db.session.begin_nested():
obj = cls()
db.session.add(obj)
except IntegrityError: # pragma: no cover
with db.session.begin_nested():
# Someone has likely modified the table without using the
# models API. Let's fix the problem.
cls._set_sequence(cls.max())
obj = cls()
db.session.add(obj)
return obj.recid
@classmethod
[docs] def max(cls):
"""Get max record identifier."""
max_recid = db.session.query(func.max(cls.recid)).scalar()
return max_recid if max_recid else 0
@classmethod
def _set_sequence(cls, val):
"""Internal function to reset sequence to specific value.
Note: this function is for PostgreSQL compatibility.
:param val: The value to be set.
"""
if db.engine.dialect.name == 'postgresql': # pragma: no cover
db.session.execute(
"SELECT setval(pg_get_serial_sequence("
"'{0}', 'recid'), :newval)".format(
cls.__tablename__), dict(newval=val))
@classmethod
[docs] def insert(cls, val):
"""Insert a record identifier.
:param val: The `recid` column value to insert.
"""
with db.session.begin_nested():
obj = cls(recid=val)
db.session.add(obj)
cls._set_sequence(cls.max())
__all__ = (
'PersistentIdentifier',
'PIDStatus',
'RecordIdentifier',
'Redirect',
)