Source code for revscoring.datasources.meta.frequencies

"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `list`'s of items and produce frequency tables.

.. autoclass:: revscoring.datasources.meta.frequencies.table

.. autoclass:: revscoring.datasources.meta.frequencies.delta

.. autoclass:: revscoring.datasources.meta.frequencies.prop_delta
"""
from ..datasource import Datasource


[docs]class table(Datasource): """ Generates a frequency table for a list of items generated by another datasource. :Parameters: items_datasource : :class:`revscoring.Datasource` A datasource that generates a list of some `hashable` item name : `str` A name for the datasource. """ def __init__(self, items_datasource, name=None): name = self._format_name(name, [items_datasource]) super().__init__(name, self.process, depends_on=[items_datasource]) def process(self, items): freq = {} for item in items: if item in freq: freq[item] += 1 else: freq[item] = 1 return freq
[docs]class delta(Datasource): """ Generates a frequency table diff by comparing two frequency tables. :Parameters: old_ft_datasource : :class:`revscoring.Datasource` A frequency table datasource new_ft_datasource : :class:`revscoring.Datasource` A frequency table datasource name : `str` A name for the datasource. """ def __init__(self, old_ft_datasource, new_ft_datasource, name=None): name = self._format_name(name, [old_ft_datasource, new_ft_datasource]) super().__init__(name, self.process, depends_on=[old_ft_datasource, new_ft_datasource]) def process(self, old_ft, new_tf): old_ft = old_ft or {} delta_table = {} for item, new_count in new_tf.items(): old_count = old_ft.get(item, 0) if new_count != old_count: delta_table[item] = new_count - old_count for item in old_ft.keys() - new_tf.keys(): delta_table[item] = old_ft[item] * -1 return delta_table
[docs]class prop_delta(Datasource): """ Generates a proportional frequency table diff by comparing a frequency table diff with an old frequency table. :Parameters: old_ft_datasource : :class:`revscoring.Datasource` A frequency table datasource new_ft_datasource : :class:`revscoring.Datasource` A frequency table datasource name : `str` A name for the datasource. """ def __init__(self, old_ft_datasource, delta_datasource, name=None): name = self._format_name(name, [old_ft_datasource, delta_datasource]) super().__init__(name, self.process, depends_on=[old_ft_datasource, delta_datasource]) def process(self, old_tf, ft_delta): prop_delta = {} for item, delta in ft_delta.items(): if delta > 0: prop_delta[item] = delta / (old_tf.get(item, 0) + 1) else: prop_delta[item] = delta / old_tf[item] return prop_delta

Revision Scoring

Navigation