Source code for json_merger.stats
# -*- coding: utf-8 -*-
#
# This file is part of Inspirehep.
# Copyright (C) 2016 CERN.
#
# Inspirehep is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Inspirehep is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Inspirehep; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
from __future__ import absolute_import, print_function
[docs]class ListMatchStats(object):
"""Class for holding list entity matching stats."""
def __init__(self, lst, root):
"""
Args:
lst: The list of elements that needs to be matched.
root: The ancestor of the list of elements that needs to be
matched.
Attributes:
in_result_idx: Indices of elements in lst that are present in the
end result.
in_result: Elements in lst that are present in the end result.
not_in_result_idx: Indices of elements in lst that are not present
in the end result.
not_in_result: Elements in lst that are not present in the end
result.
not_in_result_root_match_idx: Indices of elements that are not in
the end result but were matched with root elements.
not_in_result_root_match: Elements that are not in the end result
but were matched with root elements.
not_in_result_not_root_match_idx: Indices of elements that are not
in the end result and were not matched with any root elements.
not_in_result_not_root_match: Elements that are not in the end
result and were not matched with any root elements.
not_in_result_root_match_pairs: Pairs of (lst, root) elements
that are not in the end result but were matched.
"""
self.lst = lst
self.root = root
self.in_result_idx = set()
self.not_in_result_root_match_idx = set()
self.root_matches = {}
[docs] def move_to_result(self, lst_idx):
"""Moves element from lst available at lst_idx."""
self.in_result_idx.add(lst_idx)
if lst_idx in self.not_in_result_root_match_idx:
self.not_in_result_root_match_idx.remove(lst_idx)
[docs] def add_root_match(self, lst_idx, root_idx):
"""Adds a match for the elements avaialble at lst_idx and root_idx."""
self.root_matches[lst_idx] = root_idx
if lst_idx in self.in_result_idx:
return
self.not_in_result_root_match_idx.add(lst_idx)
@property
def not_in_result_idx(self):
return set(range(len(self.lst))).difference(self.in_result_idx)
@property
def not_in_result_not_root_match_idx(self):
return self.not_in_result_idx.difference(
self.not_in_result_root_match_idx)
@property
def in_result(self):
return [self.lst[e] for e in self.in_result_idx]
@property
def not_in_result(self):
return [self.lst[e] for e in self.not_in_result_idx]
@property
def not_in_result_root_match(self):
return [self.lst[e] for e in self.not_in_result_root_match_idx]
@property
def not_in_result_not_root_match(self):
return [self.lst[e] for e in self.not_in_result_not_root_match_idx]
@property
def not_in_result_root_match_pairs(self):
return [(self.lst[e], self.root[self.root_matches[e]])
for e in self.not_in_result_root_match_idx]
@property
def not_matched_root_objects(self):
matched_root_idx = set(self.root_matches.values())
return [o for idx, o in enumerate(self.root)
if idx not in matched_root_idx]