Source code for invenio_records_rest.serializers.citeproc

# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2016 CERN.
#
# Invenio is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""CSL Citation Formatter serializer for records."""

from __future__ import absolute_import, print_function

import json
import re

from citeproc import Citation, CitationItem, CitationStylesBibliography, \
    CitationStylesStyle, formatter
from citeproc.source.bibtex import BibTeX
from citeproc.source.json import CiteProcJSON
from flask import abort, has_request_context, request
from webargs import fields
from webargs.flaskparser import FlaskParser

try:
    from citeproc_styles import get_style_filepath
    from citeproc_styles.errors import StyleNotFoundError
except:
    import warnings
    warnings.warn('citeproc_styles not found. '
                  'Please install to enable Citeproc Serialization.')


class CiteprocSerializer(object):
    """CSL Citation Formatter serializer for records.

    In order to produce a formatted citation of a record through citeproc-py,
    we need a CSL-JSON or BibTeX serialized version of it. Since there may be
    already an implementation of such a serializer, it can be passed in the
    constructor of this class. This serializer has to implement a `serialize`
    method that returns the CSL-JSON/BibTeX result.
    """

    _default_style = 'harvard1'
    """The `citeproc-py` library supports by default the 'harvard1' style."""

    _default_locale = 'en-US'
    """The `citeproc-py` library supports by default the 'harvard1' style."""

    _user_args = {
        'style': fields.Str(missing=_default_style),
        'locale': fields.Str(missing=_default_locale)
    }
    """Arguments for the webargs parser."""

    _valid_formats = ('csl', 'bibtex')
    """Supported formats by citeproc-py."""

    def __init__(self, serializer, record_format='csl'):
        """Initialize the inner record serializer.

        :param serializer: Serializer object that does the record serialization
        to a format that `citeproc-py` can process (CSL-JSON or BibTeX). The
        object has to implement a `serialize` method that matches the signature
        of the `serialize` method of this class.
        :param record_format: Format that the serializer produces
        """
        assert record_format in self._valid_formats

        assert getattr(serializer, 'serialize', None)
        assert callable(getattr(serializer, 'serialize'))

        self.serializer = serializer
        self.record_format = record_format

    @classmethod
    def _get_args(cls, **kwargs):
        """Parse style and locale.

        Argument location precedence: kwargs > view_args > query
        """
        csl_args = {
            'style': cls._default_style,
            'locale': cls._default_locale
        }

        if has_request_context():
            parser = FlaskParser(locations=('view_args', 'query'))
            csl_args.update(parser.parse(cls._user_args, request))

        csl_args.update({k: kwargs[k]
                         for k in ('style', 'locale') if k in kwargs})

        try:
            csl_args['style'] = get_style_filepath(csl_args['style'].lower())
        except StyleNotFoundError:
            if has_request_context():
                abort(400)
            raise
        return csl_args

    def _get_source(self, data):
        """Get source data object for citeproc-py."""
        if self.record_format == 'csl':
            return CiteProcJSON([json.loads(data)])
        elif self.record_format == 'bibtex':
            return BibTeX(data)

    def _clean_result(self, text):
        """Remove double spaces and punctuation."""
        text = re.sub('\s\s+', ' ', text)
        text = re.sub('\.\.+', '.', text)
        return text

    def serialize(self, pid, record, links_factory=None, **kwargs):
        """Serialize a single record.

        :param pid: Persistent identifier instance.
        :param record: Record instance.
        :param links_factory: Factory function for record links.
        """
        data = self.serializer.serialize(pid, record, links_factory)
        source = self._get_source(data)
        style = CitationStylesStyle(validate=False, **self._get_args(**kwargs))
        bib = CitationStylesBibliography(style, source, formatter.plain)
        citation = Citation([CitationItem(pid.pid_value)])
        bib.register(citation)

        return self._clean_result(''.join(bib.bibliography()[0]))