Source code for monty.re
# coding: utf-8
"""
#TODO: Write module doc.
"""
from __future__ import division, unicode_literals, absolute_import
import re
from monty.io import zopen, reverse_readfile
import collections
__author__ = 'Shyue Ping Ong'
__copyright__ = 'Copyright 2013, The Materials Virtual Lab'
__version__ = '0.1'
__maintainer__ = 'Shyue Ping Ong'
__email__ = 'ongsp@ucsd.edu'
__date__ = '6/2/15'
[docs]def regrep(filename, patterns, reverse=False, terminate_on_match=False,
postprocess=str):
"""
A powerful regular expression version of grep.
Args:
filename (str): Filename to grep.
patterns (dict): A dict of patterns, e.g.,
{"energy": "energy\(sigma->0\)\s+=\s+([\d\-\.]+)"}.
reverse (bool): Read files in reverse. Defaults to false. Useful for
large files, especially when used with terminate_on_match.
terminate_on_match (bool): Whether to terminate when there is at
least one match in each key in pattern.
postprocess (callable): A post processing function to convert all
matches. Defaults to str, i.e., no change.
Returns:
A dict of the following form:
{key1: [[[matches...], lineno], [[matches...], lineno],
[[matches...], lineno], ...],
key2: ...}
For reverse reads, the lineno is given as a -ve number. Please note
that 0-based indexing is used.
"""
compiled = {k: re.compile(v) for k, v in patterns.items()}
matches = collections.defaultdict(list)
gen = reverse_readfile(filename) if reverse else zopen(filename, "rt")
for i, l in enumerate(gen):
for k, p in compiled.items():
m = p.search(l)
if m:
matches[k].append([[postprocess(g) for g in m.groups()],
-i if reverse else i])
if terminate_on_match and all([
len(matches.get(k, [])) for k in compiled.keys()]):
break
try:
# Try to close open file handle. Pass if it is a generator.
gen.close()
except:
pass
return matches