Source code for inspire_schemas.utils

# -*- coding: utf-8 -*-
#
# This file is part of INSPIRE-SCHEMAS.
# Copyright (C) 2016 CERN.
#
# INSPIRE-SCHEMAS is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# INSPIRE-SCHEMAS is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with INSPIRE-SCHEMAS; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

"""Public api for methods and functions to handle/verify the jsonschemas."""
import datetime
import json
import os
import re
import warnings

from jsonschema import validate as jsonschema_validate
from jsonschema import RefResolver, draft4_format_checker

from pkg_resources import resource_filename
from six.moves.urllib.parse import urlsplit

from .errors import SchemaKeyNotFound, SchemaNotFound


_schema_root_path = os.path.abspath(resource_filename(__name__, 'records'))


[docs]class LocalRefResolver(RefResolver): """Simple resolver to handle non-uri relative paths."""
[docs] def resolve_remote(self, uri): """Resolve a uri or relative path to a schema.""" try: return super(LocalRefResolver, self).resolve_remote(uri) except ValueError: return super(LocalRefResolver, self).resolve_remote( 'file://' + get_schema_path(uri.rsplit('.json', 1)[0]) )
[docs]def get_schema_path(schema): """Retrieve the installed path for the given schema. :param schema: String with the (relative or absolute) url of the schema to validate, for example, 'records/authors.json' or 'jobs.json', or by just the name like 'jobs'. :type schema: str :return: The path or the given schema name. :rtype: str """ def _strip_first_path_elem(path): """Pass doctests. Strip the first element of the given path, returning an empty string if there are no more elements. For example, 'something/other' will end up as 'other', but passing then 'other' will return '' """ stripped_path = path.split(os.path.sep, 1)[1:] return ''.join(stripped_path) def _schema_to_normalized_path(schema): """Pass doctests. Extracts the path from the url, makes sure to get rid of any '..' in the path and adds the json extension if not there. """ path = os.path.normpath(os.path.sep + urlsplit(schema).path) if path.startswith(os.path.sep): path = path[1:] if not path.endswith('.json'): path += '.json' return path path = _schema_to_normalized_path(schema) while path: schema_path = os.path.abspath(os.path.join(_schema_root_path, path)) if os.path.exists(schema_path): return os.path.abspath(schema_path) path = _strip_first_path_elem(path) raise SchemaNotFound(schema=schema)
[docs]def load_schema(schema_name): """Load the given schema from wherever it's installed. :param schema_name: Name of the schema to load, for example 'authors'. """ schema_data = '' with open(get_schema_path(schema_name)) as schema_fd: schema_data = json.loads(schema_fd.read()) if '$schema' not in schema_data: schema_data = {'$schema': schema_data} return schema_data
[docs]def validate(data, schema_name=None): """Validate the given dictionary against the given schema. :param data: Dict to validate. :type data: dict :param schema_name: String with the name of the schema to validate, for example, 'authors' or 'jobs'. If `None` passed it will expect for the data to have the schema specified in the `$ref` key. :type schema_name: str :return: None :raises inspire_schemas.errors.SchemaNotFound: if the given schema was not found. :raises inspire_schemas.errors.SchemaKeyNotFound: if the given schema was not found. :raises jsonschema.SchemaError: if the schema is invalid :raises jsonschema.ValidationError: if the data is invalid """ if schema_name is None: if '$schema' not in data: raise SchemaKeyNotFound(data=data) schema_name = data['$schema'] schema = load_schema(schema_name=schema_name) return jsonschema_validate( instance=data, schema=schema, resolver=LocalRefResolver.from_schema(schema), format_checker=draft4_format_checker, )
[docs]def normalize_date_iso(date): """Normalize date for schema (format yyyy-mm-ddT00:00:00). :param date: a generic date :type date: string with the format (yyyy-mm-dd) :return formatted_date: the input date in the format (yyyy-mm-ddT00:00:00) """ warnings.warn("Don't use 'normalize_date_iso'", DeprecationWarning) try: formatted_date = datetime.datetime.\ strptime(date, '%Y-%m-%d').isoformat() except (ValueError, Exception): formatted_date = None return formatted_date
[docs]def normalize_author_name_with_comma(author): """Normalize author name. :param author: author name :type author: string :return name: the name of the author normilized """ def _verify_author_name_initials(author_name): return not bool(re.compile(r'[^A-Z. ]').search(author_name)) name = author.split(',') if len(name) > 1 and _verify_author_name_initials(name[1]): name[1] = name[1].replace(' ', '') name = ', '.join(n_elem.strip() for n_elem in name) return name