Source code for revscoring.datasources.meta.extractors

"""
These meta-datasources operate on :class:`revscoring.Datasource`'s that
return `str`'s or `list` ( `str` ) and extract information from them.

.. autoclass:: revscoring.datasources.meta.extractors.regex

"""
import re

from ..datasource import Datasource


[docs]class regex(Datasource): """ Generates a list of strings that match any of a set of privided `regexes` :Parameters: regexes : `list` ( `str` ) A list of regexes to find in the text text_datasource : :class:`revscoring.Datasource` A datasource that returns a `str` or a `list` of `str` regex_flags : `int` A set of regex flags to use in matching name : `str` A name for the new datasource """ def __init__(self, regexes, text_datasource, regex_flags=re.I, name=None): group_pattern = r"\b(" + r"|".join(regexes) + r")\b" self.group_re = re.compile(group_pattern, flags=regex_flags) name = self._format_name(name, [regexes, text_datasource]) super().__init__(name, self.process, depends_on=[text_datasource]) def process(self, text_or_texts): if text_or_texts is None: return [] elif isinstance(text_or_texts, str): text = text_or_texts return [match.group(0) for match in self.group_re.finditer(text)] else: texts = text_or_texts return [match.group(0) for text in texts for match in self.group_re.finditer(text)]

Revision Scoring

Navigation