Source code for wheezy.template.lexer

"""
"""


def lexer_scan(extensions):
[docs] """ Scans extensions for ``lexer_rules`` and ``preprocessors`` attributes. """ lexer_rules = {} preprocessors = [] postprocessors = [] for extension in extensions: if hasattr(extension, 'lexer_rules'): lexer_rules.update(extension.lexer_rules) if hasattr(extension, 'preprocessors'): preprocessors.extend(extension.preprocessors) if hasattr(extension, 'postprocessors'): postprocessors.extend(extension.postprocessors) return { 'lexer_rules': [lexer_rules[k] for k in sorted(lexer_rules.keys())], 'preprocessors': preprocessors, 'postprocessors': postprocessors } class Lexer(object):
[docs] """ Tokenizes input source per rules supplied. """ def __init__(self, lexer_rules, preprocessors=None, postprocessors=None, **ignore): """ Initializes with ``rules``. Rules must be a list of two elements tuple: ``(regex, tokenizer)`` where tokenizer if a callable of the following contract:: def tokenizer(match): return end_index, token, value """ self.rules = lexer_rules self.preprocessors = preprocessors or [] self.postprocessors = postprocessors or [] def tokenize(self, source):
[docs] """ Translates ``source`` accoring to lexer rules into an iteratable of tokens. """ for preprocessor in self.preprocessors: source = preprocessor(source) tokens = [] append = tokens.append pos = 0 lineno = 1 end = len(source) while pos < end: for regex, tokenizer in self.rules: m = regex.match(source, pos, end) if m: npos, token, value = tokenizer(m) assert npos > pos append((lineno, token, value)) lineno += source[pos:npos].count('\n') pos = npos break else: assert False, 'Lexer pattern mismatch.' for postprocessor in self.postprocessors: postprocessor(tokens) return tokens