Source code for monty.re

# coding: utf-8
"""
#TODO: Write module doc.
"""

from __future__ import division, unicode_literals, absolute_import

import re
from monty.io import zopen, reverse_readfile
import collections


__author__ = 'Shyue Ping Ong'
__copyright__ = 'Copyright 2013, The Materials Virtual Lab'
__version__ = '0.1'
__maintainer__ = 'Shyue Ping Ong'
__email__ = 'ongsp@ucsd.edu'
__date__ = '6/2/15'


[docs]def regrep(filename, patterns, reverse=False, terminate_on_match=False,
           postprocess=str):
    """
    A powerful regular expression version of grep.

    Args:
        filename (str): Filename to grep.
        patterns (dict): A dict of patterns, e.g.,
            {"energy": "energy\(sigma->0\)\s+=\s+([\d\-\.]+)"}.
        reverse (bool): Read files in reverse. Defaults to false. Useful for
            large files, especially when used with terminate_on_match.
        terminate_on_match (bool): Whether to terminate when there is at
            least one match in each key in pattern.
        postprocess (callable): A post processing function to convert all
            matches. Defaults to str, i.e., no change.

    Returns:
        A dict of the following form:
            {key1: [[[matches...], lineno], [[matches...], lineno],
                    [[matches...], lineno], ...],
            key2: ...}
        For reverse reads, the lineno is given as a -ve number. Please note
        that 0-based indexing is used.
    """
    compiled = {k: re.compile(v) for k, v in patterns.items()}
    matches = collections.defaultdict(list)
    gen = reverse_readfile(filename) if reverse else zopen(filename, "rt")
    for i, l in enumerate(gen):
        for k, p in compiled.items():
            m = p.search(l)
            if m:
                matches[k].append([[postprocess(g) for g in m.groups()],
                                   -i if reverse else i])
        if terminate_on_match and all([
                len(matches.get(k, [])) for k in compiled.keys()]):
            break
    try:
        # Try to close open file handle. Pass if it is a generator.
        gen.close()
    except:
        pass
    return matches