Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2016  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form  
  35  C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements  
  36  (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to 
  37  L{Literal} expressions):: 
  38   
  39      from pyparsing import Word, alphas 
  40   
  41      # define grammar of a greeting 
  42      greet = Word(alphas) + "," + Word(alphas) + "!" 
  43   
  44      hello = "Hello, World!" 
  45      print (hello, "->", greet.parseString(hello)) 
  46   
  47  The program outputs the following:: 
  48   
  49      Hello, World! -> ['Hello', ',', 'World', '!'] 
  50   
  51  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  52  class names, and the use of '+', '|' and '^' operators. 
  53   
  54  The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 
  55  object with named attributes. 
  56   
  57  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  58   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  59   - quoted strings 
  60   - embedded comments 
  61  """ 
  62   
  63  __version__ = "2.2.0" 
  64  __versionTime__ = "06 Mar 2017 02:06 UTC" 
  65  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  66   
  67  import string 
  68  from weakref import ref as wkref 
  69  import copy 
  70  import sys 
  71  import warnings 
  72  import re 
  73  import sre_constants 
  74  import collections 
  75  import pprint 
  76  import traceback 
  77  import types 
  78  from datetime import datetime 
  79   
  80  try: 
  81      from _thread import RLock 
  82  except ImportError: 
  83      from threading import RLock 
  84   
  85  try: 
  86      from collections import OrderedDict as _OrderedDict 
  87  except ImportError: 
  88      try: 
  89          from ordereddict import OrderedDict as _OrderedDict 
  90      except ImportError: 
  91          _OrderedDict = None 
  92   
  93  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  94   
  95  __all__ = [ 
  96  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  97  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  98  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  99  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
 100  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
 101  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
 102  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
 103  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
 104  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
 105  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
 106  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
 107  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
 108  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
 109  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
 110  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
 111  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
 112  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
 113  'CloseMatch', 'tokenMap', 'pyparsing_common', 
 114  ] 
 115   
 116  system_version = tuple(sys.version_info)[:3] 
 117  PY_3 = system_version[0] == 3 
 118  if PY_3: 
 119      _MAX_INT = sys.maxsize 
 120      basestring = str 
 121      unichr = chr 
 122      _ustr = str 
 123   
 124      # build list of single arg builtins, that can be used as parse actions 
 125      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 126   
 127  else: 
 128      _MAX_INT = sys.maxint 
 129      range = xrange 
130 131 - def _ustr(obj):
132 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 133 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 134 then < returns the unicode object | encodes it with the default encoding | ... >. 135 """ 136 if isinstance(obj,unicode): 137 return obj 138 139 try: 140 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 141 # it won't break any existing code. 142 return str(obj) 143 144 except UnicodeEncodeError: 145 # Else encode it 146 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 147 xmlcharref = Regex(r'&#\d+;') 148 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 149 return xmlcharref.transformString(ret)
150 151 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 152 singleArgBuiltins = [] 153 import __builtin__ 154 for fname in "sum len sorted reversed list tuple set any all min max".split(): 155 try: 156 singleArgBuiltins.append(getattr(__builtin__,fname)) 157 except AttributeError: 158 continue 159 160 _generatorType = type((y for y in range(1)))
161 162 -def _xml_escape(data):
163 """Escape &, <, >, ", ', etc. in a string of data.""" 164 165 # ampersand must be replaced first 166 from_symbols = '&><"\'' 167 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 168 for from_,to_ in zip(from_symbols, to_symbols): 169 data = data.replace(from_, to_) 170 return data
171
172 -class _Constants(object):
173 pass
174 175 alphas = string.ascii_uppercase + string.ascii_lowercase 176 nums = "0123456789" 177 hexnums = nums + "ABCDEFabcdef" 178 alphanums = alphas + nums 179 _bslash = chr(92) 180 printables = "".join(c for c in string.printable if c not in string.whitespace)
181 182 -class ParseBaseException(Exception):
183 """base exception class for all parsing runtime exceptions""" 184 # Performance tuning: we construct a *lot* of these, so keep this 185 # constructor as small and fast as possible
186 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
187 self.loc = loc 188 if msg is None: 189 self.msg = pstr 190 self.pstr = "" 191 else: 192 self.msg = msg 193 self.pstr = pstr 194 self.parserElement = elem 195 self.args = (pstr, loc, msg)
196 197 @classmethod
198 - def _from_exception(cls, pe):
199 """ 200 internal factory method to simplify creating one type of ParseException 201 from another - avoids having __init__ signature conflicts among subclasses 202 """ 203 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
204
205 - def __getattr__( self, aname ):
206 """supported attributes by name are: 207 - lineno - returns the line number of the exception text 208 - col - returns the column number of the exception text 209 - line - returns the line containing the exception text 210 """ 211 if( aname == "lineno" ): 212 return lineno( self.loc, self.pstr ) 213 elif( aname in ("col", "column") ): 214 return col( self.loc, self.pstr ) 215 elif( aname == "line" ): 216 return line( self.loc, self.pstr ) 217 else: 218 raise AttributeError(aname)
219
220 - def __str__( self ):
221 return "%s (at char %d), (line:%d, col:%d)" % \ 222 ( self.msg, self.loc, self.lineno, self.column )
223 - def __repr__( self ):
224 return _ustr(self)
225 - def markInputline( self, markerString = ">!<" ):
226 """Extracts the exception line from the input string, and marks 227 the location of the exception with a special symbol. 228 """ 229 line_str = self.line 230 line_column = self.column - 1 231 if markerString: 232 line_str = "".join((line_str[:line_column], 233 markerString, line_str[line_column:])) 234 return line_str.strip()
235 - def __dir__(self):
236 return "lineno col line".split() + dir(type(self))
237
238 -class ParseException(ParseBaseException):
239 """ 240 Exception thrown when parse expressions don't match class; 241 supported attributes by name are: 242 - lineno - returns the line number of the exception text 243 - col - returns the column number of the exception text 244 - line - returns the line containing the exception text 245 246 Example:: 247 try: 248 Word(nums).setName("integer").parseString("ABC") 249 except ParseException as pe: 250 print(pe) 251 print("column: {}".format(pe.col)) 252 253 prints:: 254 Expected integer (at char 0), (line:1, col:1) 255 column: 1 256 """ 257 pass
258
259 -class ParseFatalException(ParseBaseException):
260 """user-throwable exception thrown when inconsistent parse content 261 is found; stops all parsing immediately""" 262 pass
263
264 -class ParseSyntaxException(ParseFatalException):
265 """just like L{ParseFatalException}, but thrown internally when an 266 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 267 immediately because an unbacktrackable syntax error has been found""" 268 pass
269
270 #~ class ReparseException(ParseBaseException): 271 #~ """Experimental class - parse actions can raise this exception to cause 272 #~ pyparsing to reparse the input string: 273 #~ - with a modified input string, and/or 274 #~ - with a modified start location 275 #~ Set the values of the ReparseException in the constructor, and raise the 276 #~ exception in a parse action to cause pyparsing to use the new string/location. 277 #~ Setting the values as None causes no change to be made. 278 #~ """ 279 #~ def __init_( self, newstring, restartLoc ): 280 #~ self.newParseText = newstring 281 #~ self.reparseLoc = restartLoc 282 283 -class RecursiveGrammarException(Exception):
284 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
285 - def __init__( self, parseElementList ):
286 self.parseElementTrace = parseElementList
287
288 - def __str__( self ):
289 return "RecursiveGrammarException: %s" % self.parseElementTrace
290
291 -class _ParseResultsWithOffset(object):
292 - def __init__(self,p1,p2):
293 self.tup = (p1,p2)
294 - def __getitem__(self,i):
295 return self.tup[i]
296 - def __repr__(self):
297 return repr(self.tup[0])
298 - def setOffset(self,i):
299 self.tup = (self.tup[0],i)
300
301 -class ParseResults(object):
302 """ 303 Structured parse results, to provide multiple means of access to the parsed data: 304 - as a list (C{len(results)}) 305 - by list index (C{results[0], results[1]}, etc.) 306 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 307 308 Example:: 309 integer = Word(nums) 310 date_str = (integer.setResultsName("year") + '/' 311 + integer.setResultsName("month") + '/' 312 + integer.setResultsName("day")) 313 # equivalent form: 314 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 315 316 # parseString returns a ParseResults object 317 result = date_str.parseString("1999/12/31") 318 319 def test(s, fn=repr): 320 print("%s -> %s" % (s, fn(eval(s)))) 321 test("list(result)") 322 test("result[0]") 323 test("result['month']") 324 test("result.day") 325 test("'month' in result") 326 test("'minutes' in result") 327 test("result.dump()", str) 328 prints:: 329 list(result) -> ['1999', '/', '12', '/', '31'] 330 result[0] -> '1999' 331 result['month'] -> '12' 332 result.day -> '31' 333 'month' in result -> True 334 'minutes' in result -> False 335 result.dump() -> ['1999', '/', '12', '/', '31'] 336 - day: 31 337 - month: 12 338 - year: 1999 339 """
340 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
341 if isinstance(toklist, cls): 342 return toklist 343 retobj = object.__new__(cls) 344 retobj.__doinit = True 345 return retobj
346 347 # Performance tuning: we construct a *lot* of these, so keep this 348 # constructor as small and fast as possible
349 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
350 if self.__doinit: 351 self.__doinit = False 352 self.__name = None 353 self.__parent = None 354 self.__accumNames = {} 355 self.__asList = asList 356 self.__modal = modal 357 if toklist is None: 358 toklist = [] 359 if isinstance(toklist, list): 360 self.__toklist = toklist[:] 361 elif isinstance(toklist, _generatorType): 362 self.__toklist = list(toklist) 363 else: 364 self.__toklist = [toklist] 365 self.__tokdict = dict() 366 367 if name is not None and name: 368 if not modal: 369 self.__accumNames[name] = 0 370 if isinstance(name,int): 371 name = _ustr(name) # will always return a str, but use _ustr for consistency 372 self.__name = name 373 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 374 if isinstance(toklist,basestring): 375 toklist = [ toklist ] 376 if asList: 377 if isinstance(toklist,ParseResults): 378 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 379 else: 380 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 381 self[name].__name = name 382 else: 383 try: 384 self[name] = toklist[0] 385 except (KeyError,TypeError,IndexError): 386 self[name] = toklist
387
388 - def __getitem__( self, i ):
389 if isinstance( i, (int,slice) ): 390 return self.__toklist[i] 391 else: 392 if i not in self.__accumNames: 393 return self.__tokdict[i][-1][0] 394 else: 395 return ParseResults([ v[0] for v in self.__tokdict[i] ])
396
397 - def __setitem__( self, k, v, isinstance=isinstance ):
398 if isinstance(v,_ParseResultsWithOffset): 399 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 400 sub = v[0] 401 elif isinstance(k,(int,slice)): 402 self.__toklist[k] = v 403 sub = v 404 else: 405 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 406 sub = v 407 if isinstance(sub,ParseResults): 408 sub.__parent = wkref(self)
409
410 - def __delitem__( self, i ):
411 if isinstance(i,(int,slice)): 412 mylen = len( self.__toklist ) 413 del self.__toklist[i] 414 415 # convert int to slice 416 if isinstance(i, int): 417 if i < 0: 418 i += mylen 419 i = slice(i, i+1) 420 # get removed indices 421 removed = list(range(*i.indices(mylen))) 422 removed.reverse() 423 # fixup indices in token dictionary 424 for name,occurrences in self.__tokdict.items(): 425 for j in removed: 426 for k, (value, position) in enumerate(occurrences): 427 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 428 else: 429 del self.__tokdict[i]
430
431 - def __contains__( self, k ):
432 return k in self.__tokdict
433
434 - def __len__( self ): return len( self.__toklist )
435 - def __bool__(self): return ( not not self.__toklist )
436 __nonzero__ = __bool__
437 - def __iter__( self ): return iter( self.__toklist )
438 - def __reversed__( self ): return iter( self.__toklist[::-1] )
439 - def _iterkeys( self ):
440 if hasattr(self.__tokdict, "iterkeys"): 441 return self.__tokdict.iterkeys() 442 else: 443 return iter(self.__tokdict)
444
445 - def _itervalues( self ):
446 return (self[k] for k in self._iterkeys())
447
448 - def _iteritems( self ):
449 return ((k, self[k]) for k in self._iterkeys())
450 451 if PY_3: 452 keys = _iterkeys 453 """Returns an iterator of all named result keys (Python 3.x only).""" 454 455 values = _itervalues 456 """Returns an iterator of all named result values (Python 3.x only).""" 457 458 items = _iteritems 459 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 460 461 else: 462 iterkeys = _iterkeys 463 """Returns an iterator of all named result keys (Python 2.x only).""" 464 465 itervalues = _itervalues 466 """Returns an iterator of all named result values (Python 2.x only).""" 467 468 iteritems = _iteritems 469 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 470
471 - def keys( self ):
472 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 473 return list(self.iterkeys())
474
475 - def values( self ):
476 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 477 return list(self.itervalues())
478
479 - def items( self ):
480 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 481 return list(self.iteritems())
482
483 - def haskeys( self ):
484 """Since keys() returns an iterator, this method is helpful in bypassing 485 code that looks for the existence of any defined results names.""" 486 return bool(self.__tokdict)
487
488 - def pop( self, *args, **kwargs):
489 """ 490 Removes and returns item at specified index (default=C{last}). 491 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 492 argument or an integer argument, it will use C{list} semantics 493 and pop tokens from the list of parsed tokens. If passed a 494 non-integer argument (most likely a string), it will use C{dict} 495 semantics and pop the corresponding value from any defined 496 results names. A second default return value argument is 497 supported, just as in C{dict.pop()}. 498 499 Example:: 500 def remove_first(tokens): 501 tokens.pop(0) 502 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 503 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 504 505 label = Word(alphas) 506 patt = label("LABEL") + OneOrMore(Word(nums)) 507 print(patt.parseString("AAB 123 321").dump()) 508 509 # Use pop() in a parse action to remove named result (note that corresponding value is not 510 # removed from list form of results) 511 def remove_LABEL(tokens): 512 tokens.pop("LABEL") 513 return tokens 514 patt.addParseAction(remove_LABEL) 515 print(patt.parseString("AAB 123 321").dump()) 516 prints:: 517 ['AAB', '123', '321'] 518 - LABEL: AAB 519 520 ['AAB', '123', '321'] 521 """ 522 if not args: 523 args = [-1] 524 for k,v in kwargs.items(): 525 if k == 'default': 526 args = (args[0], v) 527 else: 528 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 529 if (isinstance(args[0], int) or 530 len(args) == 1 or 531 args[0] in self): 532 index = args[0] 533 ret = self[index] 534 del self[index] 535 return ret 536 else: 537 defaultvalue = args[1] 538 return defaultvalue
539
540 - def get(self, key, defaultValue=None):
541 """ 542 Returns named result matching the given key, or if there is no 543 such name, then returns the given C{defaultValue} or C{None} if no 544 C{defaultValue} is specified. 545 546 Similar to C{dict.get()}. 547 548 Example:: 549 integer = Word(nums) 550 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 551 552 result = date_str.parseString("1999/12/31") 553 print(result.get("year")) # -> '1999' 554 print(result.get("hour", "not specified")) # -> 'not specified' 555 print(result.get("hour")) # -> None 556 """ 557 if key in self: 558 return self[key] 559 else: 560 return defaultValue
561
562 - def insert( self, index, insStr ):
563 """ 564 Inserts new element at location index in the list of parsed tokens. 565 566 Similar to C{list.insert()}. 567 568 Example:: 569 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 570 571 # use a parse action to insert the parse location in the front of the parsed results 572 def insert_locn(locn, tokens): 573 tokens.insert(0, locn) 574 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 575 """ 576 self.__toklist.insert(index, insStr) 577 # fixup indices in token dictionary 578 for name,occurrences in self.__tokdict.items(): 579 for k, (value, position) in enumerate(occurrences): 580 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
581
582 - def append( self, item ):
583 """ 584 Add single element to end of ParseResults list of elements. 585 586 Example:: 587 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 588 589 # use a parse action to compute the sum of the parsed integers, and add it to the end 590 def append_sum(tokens): 591 tokens.append(sum(map(int, tokens))) 592 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 593 """ 594 self.__toklist.append(item)
595
596 - def extend( self, itemseq ):
597 """ 598 Add sequence of elements to end of ParseResults list of elements. 599 600 Example:: 601 patt = OneOrMore(Word(alphas)) 602 603 # use a parse action to append the reverse of the matched strings, to make a palindrome 604 def make_palindrome(tokens): 605 tokens.extend(reversed([t[::-1] for t in tokens])) 606 return ''.join(tokens) 607 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 608 """ 609 if isinstance(itemseq, ParseResults): 610 self += itemseq 611 else: 612 self.__toklist.extend(itemseq)
613
614 - def clear( self ):
615 """ 616 Clear all elements and results names. 617 """ 618 del self.__toklist[:] 619 self.__tokdict.clear()
620
621 - def __getattr__( self, name ):
622 try: 623 return self[name] 624 except KeyError: 625 return "" 626 627 if name in self.__tokdict: 628 if name not in self.__accumNames: 629 return self.__tokdict[name][-1][0] 630 else: 631 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 632 else: 633 return ""
634
635 - def __add__( self, other ):
636 ret = self.copy() 637 ret += other 638 return ret
639
640 - def __iadd__( self, other ):
641 if other.__tokdict: 642 offset = len(self.__toklist) 643 addoffset = lambda a: offset if a<0 else a+offset 644 otheritems = other.__tokdict.items() 645 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 646 for (k,vlist) in otheritems for v in vlist] 647 for k,v in otherdictitems: 648 self[k] = v 649 if isinstance(v[0],ParseResults): 650 v[0].__parent = wkref(self) 651 652 self.__toklist += other.__toklist 653 self.__accumNames.update( other.__accumNames ) 654 return self
655
656 - def __radd__(self, other):
657 if isinstance(other,int) and other == 0: 658 # useful for merging many ParseResults using sum() builtin 659 return self.copy() 660 else: 661 # this may raise a TypeError - so be it 662 return other + self
663
664 - def __repr__( self ):
665 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
666
667 - def __str__( self ):
668 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
669
670 - def _asStringList( self, sep='' ):
671 out = [] 672 for item in self.__toklist: 673 if out and sep: 674 out.append(sep) 675 if isinstance( item, ParseResults ): 676 out += item._asStringList() 677 else: 678 out.append( _ustr(item) ) 679 return out
680
681 - def asList( self ):
682 """ 683 Returns the parse results as a nested list of matching tokens, all converted to strings. 684 685 Example:: 686 patt = OneOrMore(Word(alphas)) 687 result = patt.parseString("sldkj lsdkj sldkj") 688 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 689 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 690 691 # Use asList() to create an actual list 692 result_list = result.asList() 693 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 694 """ 695 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
696
697 - def asDict( self ):
698 """ 699 Returns the named parse results as a nested dictionary. 700 701 Example:: 702 integer = Word(nums) 703 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 704 705 result = date_str.parseString('12/31/1999') 706 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 707 708 result_dict = result.asDict() 709 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 710 711 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 712 import json 713 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 714 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 715 """ 716 if PY_3: 717 item_fn = self.items 718 else: 719 item_fn = self.iteritems 720 721 def toItem(obj): 722 if isinstance(obj, ParseResults): 723 if obj.haskeys(): 724 return obj.asDict() 725 else: 726 return [toItem(v) for v in obj] 727 else: 728 return obj
729 730 return dict((k,toItem(v)) for k,v in item_fn())
731
732 - def copy( self ):
733 """ 734 Returns a new copy of a C{ParseResults} object. 735 """ 736 ret = ParseResults( self.__toklist ) 737 ret.__tokdict = self.__tokdict.copy() 738 ret.__parent = self.__parent 739 ret.__accumNames.update( self.__accumNames ) 740 ret.__name = self.__name 741 return ret
742
743 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
744 """ 745 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 746 """ 747 nl = "\n" 748 out = [] 749 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 750 for v in vlist) 751 nextLevelIndent = indent + " " 752 753 # collapse out indents if formatting is not desired 754 if not formatted: 755 indent = "" 756 nextLevelIndent = "" 757 nl = "" 758 759 selfTag = None 760 if doctag is not None: 761 selfTag = doctag 762 else: 763 if self.__name: 764 selfTag = self.__name 765 766 if not selfTag: 767 if namedItemsOnly: 768 return "" 769 else: 770 selfTag = "ITEM" 771 772 out += [ nl, indent, "<", selfTag, ">" ] 773 774 for i,res in enumerate(self.__toklist): 775 if isinstance(res,ParseResults): 776 if i in namedItems: 777 out += [ res.asXML(namedItems[i], 778 namedItemsOnly and doctag is None, 779 nextLevelIndent, 780 formatted)] 781 else: 782 out += [ res.asXML(None, 783 namedItemsOnly and doctag is None, 784 nextLevelIndent, 785 formatted)] 786 else: 787 # individual token, see if there is a name for it 788 resTag = None 789 if i in namedItems: 790 resTag = namedItems[i] 791 if not resTag: 792 if namedItemsOnly: 793 continue 794 else: 795 resTag = "ITEM" 796 xmlBodyText = _xml_escape(_ustr(res)) 797 out += [ nl, nextLevelIndent, "<", resTag, ">", 798 xmlBodyText, 799 "</", resTag, ">" ] 800 801 out += [ nl, indent, "</", selfTag, ">" ] 802 return "".join(out)
803
804 - def __lookup(self,sub):
805 for k,vlist in self.__tokdict.items(): 806 for v,loc in vlist: 807 if sub is v: 808 return k 809 return None
810
811 - def getName(self):
812 r""" 813 Returns the results name for this token expression. Useful when several 814 different expressions might match at a particular location. 815 816 Example:: 817 integer = Word(nums) 818 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 819 house_number_expr = Suppress('#') + Word(nums, alphanums) 820 user_data = (Group(house_number_expr)("house_number") 821 | Group(ssn_expr)("ssn") 822 | Group(integer)("age")) 823 user_info = OneOrMore(user_data) 824 825 result = user_info.parseString("22 111-22-3333 #221B") 826 for item in result: 827 print(item.getName(), ':', item[0]) 828 prints:: 829 age : 22 830 ssn : 111-22-3333 831 house_number : 221B 832 """ 833 if self.__name: 834 return self.__name 835 elif self.__parent: 836 par = self.__parent() 837 if par: 838 return par.__lookup(self) 839 else: 840 return None 841 elif (len(self) == 1 and 842 len(self.__tokdict) == 1 and 843 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 844 return next(iter(self.__tokdict.keys())) 845 else: 846 return None
847
848 - def dump(self, indent='', depth=0, full=True):
849 """ 850 Diagnostic method for listing out the contents of a C{ParseResults}. 851 Accepts an optional C{indent} argument so that this string can be embedded 852 in a nested display of other data. 853 854 Example:: 855 integer = Word(nums) 856 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 857 858 result = date_str.parseString('12/31/1999') 859 print(result.dump()) 860 prints:: 861 ['12', '/', '31', '/', '1999'] 862 - day: 1999 863 - month: 31 864 - year: 12 865 """ 866 out = [] 867 NL = '\n' 868 out.append( indent+_ustr(self.asList()) ) 869 if full: 870 if self.haskeys(): 871 items = sorted((str(k), v) for k,v in self.items()) 872 for k,v in items: 873 if out: 874 out.append(NL) 875 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 876 if isinstance(v,ParseResults): 877 if v: 878 out.append( v.dump(indent,depth+1) ) 879 else: 880 out.append(_ustr(v)) 881 else: 882 out.append(repr(v)) 883 elif any(isinstance(vv,ParseResults) for vv in self): 884 v = self 885 for i,vv in enumerate(v): 886 if isinstance(vv,ParseResults): 887 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 888 else: 889 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 890 891 return "".join(out)
892
893 - def pprint(self, *args, **kwargs):
894 """ 895 Pretty-printer for parsed results as a list, using the C{pprint} module. 896 Accepts additional positional or keyword args as defined for the 897 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 898 899 Example:: 900 ident = Word(alphas, alphanums) 901 num = Word(nums) 902 func = Forward() 903 term = ident | num | Group('(' + func + ')') 904 func <<= ident + Group(Optional(delimitedList(term))) 905 result = func.parseString("fna a,b,(fnb c,d,200),100") 906 result.pprint(width=40) 907 prints:: 908 ['fna', 909 ['a', 910 'b', 911 ['(', 'fnb', ['c', 'd', '200'], ')'], 912 '100']] 913 """ 914 pprint.pprint(self.asList(), *args, **kwargs)
915 916 # add support for pickle protocol
917 - def __getstate__(self):
918 return ( self.__toklist, 919 ( self.__tokdict.copy(), 920 self.__parent is not None and self.__parent() or None, 921 self.__accumNames, 922 self.__name ) )
923
924 - def __setstate__(self,state):
925 self.__toklist = state[0] 926 (self.__tokdict, 927 par, 928 inAccumNames, 929 self.__name) = state[1] 930 self.__accumNames = {} 931 self.__accumNames.update(inAccumNames) 932 if par is not None: 933 self.__parent = wkref(par) 934 else: 935 self.__parent = None
936
937 - def __getnewargs__(self):
938 return self.__toklist, self.__name, self.__asList, self.__modal
939
940 - def __dir__(self):
941 return (dir(type(self)) + list(self.keys()))
942 943 collections.MutableMapping.register(ParseResults)
944 945 -def col (loc,strg):
946 """Returns current column within a string, counting newlines as line separators. 947 The first column is number 1. 948 949 Note: the default parsing behavior is to expand tabs in the input string 950 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 951 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 952 consistent view of the parsed string, the parse location, and line and column 953 positions within the parsed string. 954 """ 955 s = strg 956 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
957
958 -def lineno(loc,strg):
959 """Returns current line number within a string, counting newlines as line separators. 960 The first line is number 1. 961 962 Note: the default parsing behavior is to expand tabs in the input string 963 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 964 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 965 consistent view of the parsed string, the parse location, and line and column 966 positions within the parsed string. 967 """ 968 return strg.count("\n",0,loc) + 1
969
970 -def line( loc, strg ):
971 """Returns the line of text containing loc within a string, counting newlines as line separators. 972 """ 973 lastCR = strg.rfind("\n", 0, loc) 974 nextCR = strg.find("\n", loc) 975 if nextCR >= 0: 976 return strg[lastCR+1:nextCR] 977 else: 978 return strg[lastCR+1:]
979
980 -def _defaultStartDebugAction( instring, loc, expr ):
981 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
982
983 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
984 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
985
986 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
987 print ("Exception raised:" + _ustr(exc))
988
989 -def nullDebugAction(*args):
990 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 991 pass
992 993 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 994 #~ 'decorator to trim function calls to match the arity of the target' 995 #~ def _trim_arity(func, maxargs=3): 996 #~ if func in singleArgBuiltins: 997 #~ return lambda s,l,t: func(t) 998 #~ limit = 0 999 #~ foundArity = False 1000 #~ def wrapper(*args): 1001 #~ nonlocal limit,foundArity 1002 #~ while 1: 1003 #~ try: 1004 #~ ret = func(*args[limit:]) 1005 #~ foundArity = True 1006 #~ return ret 1007 #~ except TypeError: 1008 #~ if limit == maxargs or foundArity: 1009 #~ raise 1010 #~ limit += 1 1011 #~ continue 1012 #~ return wrapper 1013 1014 # this version is Python 2.x-3.x cross-compatible 1015 'decorator to trim function calls to match the arity of the target'
1016 -def _trim_arity(func, maxargs=2):
1017 if func in singleArgBuiltins: 1018 return lambda s,l,t: func(t) 1019 limit = [0] 1020 foundArity = [False] 1021 1022 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1023 if system_version[:2] >= (3,5): 1024 def extract_stack(limit=0): 1025 # special handling for Python 3.5.0 - extra deep call stack by 1 1026 offset = -3 if system_version == (3,5,0) else -2 1027 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 1028 return [(frame_summary.filename, frame_summary.lineno)]
1029 def extract_tb(tb, limit=0): 1030 frames = traceback.extract_tb(tb, limit=limit) 1031 frame_summary = frames[-1] 1032 return [(frame_summary.filename, frame_summary.lineno)] 1033 else: 1034 extract_stack = traceback.extract_stack 1035 extract_tb = traceback.extract_tb 1036 1037 # synthesize what would be returned by traceback.extract_stack at the call to 1038 # user's parse action 'func', so that we don't incur call penalty at parse time 1039 1040 LINE_DIFF = 6 1041 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1042 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1043 this_line = extract_stack(limit=2)[-1] 1044 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1045 1046 def wrapper(*args): 1047 while 1: 1048 try: 1049 ret = func(*args[limit[0]:]) 1050 foundArity[0] = True 1051 return ret 1052 except TypeError: 1053 # re-raise TypeErrors if they did not come from our arity testing 1054 if foundArity[0]: 1055 raise 1056 else: 1057 try: 1058 tb = sys.exc_info()[-1] 1059 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1060 raise 1061 finally: 1062 del tb 1063 1064 if limit[0] <= maxargs: 1065 limit[0] += 1 1066 continue 1067 raise 1068 1069 # copy func name to wrapper for sensible debug output 1070 func_name = "<parse action>" 1071 try: 1072 func_name = getattr(func, '__name__', 1073 getattr(func, '__class__').__name__) 1074 except Exception: 1075 func_name = str(func) 1076 wrapper.__name__ = func_name 1077 1078 return wrapper 1079
1080 -class ParserElement(object):
1081 """Abstract base level parser element class.""" 1082 DEFAULT_WHITE_CHARS = " \n\t\r" 1083 verbose_stacktrace = False 1084 1085 @staticmethod
1086 - def setDefaultWhitespaceChars( chars ):
1087 r""" 1088 Overrides the default whitespace chars 1089 1090 Example:: 1091 # default whitespace chars are space, <TAB> and newline 1092 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1093 1094 # change to just treat newline as significant 1095 ParserElement.setDefaultWhitespaceChars(" \t") 1096 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1097 """ 1098 ParserElement.DEFAULT_WHITE_CHARS = chars
1099 1100 @staticmethod
1101 - def inlineLiteralsUsing(cls):
1102 """ 1103 Set class to be used for inclusion of string literals into a parser. 1104 1105 Example:: 1106 # default literal class used is Literal 1107 integer = Word(nums) 1108 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1109 1110 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1111 1112 1113 # change to Suppress 1114 ParserElement.inlineLiteralsUsing(Suppress) 1115 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1116 1117 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1118 """ 1119 ParserElement._literalStringClass = cls
1120
1121 - def __init__( self, savelist=False ):
1122 self.parseAction = list() 1123 self.failAction = None 1124 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1125 self.strRepr = None 1126 self.resultsName = None 1127 self.saveAsList = savelist 1128 self.skipWhitespace = True 1129 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1130 self.copyDefaultWhiteChars = True 1131 self.mayReturnEmpty = False # used when checking for left-recursion 1132 self.keepTabs = False 1133 self.ignoreExprs = list() 1134 self.debug = False 1135 self.streamlined = False 1136 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1137 self.errmsg = "" 1138 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1139 self.debugActions = ( None, None, None ) #custom debug actions 1140 self.re = None 1141 self.callPreparse = True # used to avoid redundant calls to preParse 1142 self.callDuringTry = False
1143
1144 - def copy( self ):
1145 """ 1146 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1147 for the same parsing pattern, using copies of the original parse element. 1148 1149 Example:: 1150 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1151 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1152 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1153 1154 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1155 prints:: 1156 [5120, 100, 655360, 268435456] 1157 Equivalent form of C{expr.copy()} is just C{expr()}:: 1158 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1159 """ 1160 cpy = copy.copy( self ) 1161 cpy.parseAction = self.parseAction[:] 1162 cpy.ignoreExprs = self.ignoreExprs[:] 1163 if self.copyDefaultWhiteChars: 1164 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1165 return cpy
1166
1167 - def setName( self, name ):
1168 """ 1169 Define name for this expression, makes debugging and exception messages clearer. 1170 1171 Example:: 1172 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1173 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1174 """ 1175 self.name = name 1176 self.errmsg = "Expected " + self.name 1177 if hasattr(self,"exception"): 1178 self.exception.msg = self.errmsg 1179 return self
1180
1181 - def setResultsName( self, name, listAllMatches=False ):
1182 """ 1183 Define name for referencing matching tokens as a nested attribute 1184 of the returned parse results. 1185 NOTE: this returns a *copy* of the original C{ParserElement} object; 1186 this is so that the client can define a basic element, such as an 1187 integer, and reference it in multiple places with different names. 1188 1189 You can also set results names using the abbreviated syntax, 1190 C{expr("name")} in place of C{expr.setResultsName("name")} - 1191 see L{I{__call__}<__call__>}. 1192 1193 Example:: 1194 date_str = (integer.setResultsName("year") + '/' 1195 + integer.setResultsName("month") + '/' 1196 + integer.setResultsName("day")) 1197 1198 # equivalent form: 1199 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1200 """ 1201 newself = self.copy() 1202 if name.endswith("*"): 1203 name = name[:-1] 1204 listAllMatches=True 1205 newself.resultsName = name 1206 newself.modalResults = not listAllMatches 1207 return newself
1208
1209 - def setBreak(self,breakFlag = True):
1210 """Method to invoke the Python pdb debugger when this element is 1211 about to be parsed. Set C{breakFlag} to True to enable, False to 1212 disable. 1213 """ 1214 if breakFlag: 1215 _parseMethod = self._parse 1216 def breaker(instring, loc, doActions=True, callPreParse=True): 1217 import pdb 1218 pdb.set_trace() 1219 return _parseMethod( instring, loc, doActions, callPreParse )
1220 breaker._originalParseMethod = _parseMethod 1221 self._parse = breaker 1222 else: 1223 if hasattr(self._parse,"_originalParseMethod"): 1224 self._parse = self._parse._originalParseMethod 1225 return self
1226
1227 - def setParseAction( self, *fns, **kwargs ):
1228 """ 1229 Define one or more actions to perform when successfully matching parse element definition. 1230 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1231 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1232 - s = the original string being parsed (see note below) 1233 - loc = the location of the matching substring 1234 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1235 If the functions in fns modify the tokens, they can return them as the return 1236 value from fn, and the modified list of tokens will replace the original. 1237 Otherwise, fn does not need to return any value. 1238 1239 Optional keyword arguments: 1240 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1241 1242 Note: the default parsing behavior is to expand tabs in the input string 1243 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1244 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1245 consistent view of the parsed string, the parse location, and line and column 1246 positions within the parsed string. 1247 1248 Example:: 1249 integer = Word(nums) 1250 date_str = integer + '/' + integer + '/' + integer 1251 1252 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1253 1254 # use parse action to convert to ints at parse time 1255 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1256 date_str = integer + '/' + integer + '/' + integer 1257 1258 # note that integer fields are now ints, not strings 1259 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1260 """ 1261 self.parseAction = list(map(_trim_arity, list(fns))) 1262 self.callDuringTry = kwargs.get("callDuringTry", False) 1263 return self
1264
1265 - def addParseAction( self, *fns, **kwargs ):
1266 """ 1267 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1268 1269 See examples in L{I{copy}<copy>}. 1270 """ 1271 self.parseAction += list(map(_trim_arity, list(fns))) 1272 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1273 return self
1274
1275 - def addCondition(self, *fns, **kwargs):
1276 """Add a boolean predicate function to expression's list of parse actions. See 1277 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1278 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1279 1280 Optional keyword arguments: 1281 - message = define a custom message to be used in the raised exception 1282 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1283 1284 Example:: 1285 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1286 year_int = integer.copy() 1287 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1288 date_str = year_int + '/' + integer + '/' + integer 1289 1290 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1291 """ 1292 msg = kwargs.get("message", "failed user-defined condition") 1293 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1294 for fn in fns: 1295 def pa(s,l,t): 1296 if not bool(_trim_arity(fn)(s,l,t)): 1297 raise exc_type(s,l,msg)
1298 self.parseAction.append(pa) 1299 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1300 return self 1301
1302 - def setFailAction( self, fn ):
1303 """Define action to perform if parsing fails at this expression. 1304 Fail acton fn is a callable function that takes the arguments 1305 C{fn(s,loc,expr,err)} where: 1306 - s = string being parsed 1307 - loc = location where expression match was attempted and failed 1308 - expr = the parse expression that failed 1309 - err = the exception thrown 1310 The function returns no value. It may throw C{L{ParseFatalException}} 1311 if it is desired to stop parsing immediately.""" 1312 self.failAction = fn 1313 return self
1314
1315 - def _skipIgnorables( self, instring, loc ):
1316 exprsFound = True 1317 while exprsFound: 1318 exprsFound = False 1319 for e in self.ignoreExprs: 1320 try: 1321 while 1: 1322 loc,dummy = e._parse( instring, loc ) 1323 exprsFound = True 1324 except ParseException: 1325 pass 1326 return loc
1327
1328 - def preParse( self, instring, loc ):
1329 if self.ignoreExprs: 1330 loc = self._skipIgnorables( instring, loc ) 1331 1332 if self.skipWhitespace: 1333 wt = self.whiteChars 1334 instrlen = len(instring) 1335 while loc < instrlen and instring[loc] in wt: 1336 loc += 1 1337 1338 return loc
1339
1340 - def parseImpl( self, instring, loc, doActions=True ):
1341 return loc, []
1342
1343 - def postParse( self, instring, loc, tokenlist ):
1344 return tokenlist
1345 1346 #~ @profile
1347 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1348 debugging = ( self.debug ) #and doActions ) 1349 1350 if debugging or self.failAction: 1351 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1352 if (self.debugActions[0] ): 1353 self.debugActions[0]( instring, loc, self ) 1354 if callPreParse and self.callPreparse: 1355 preloc = self.preParse( instring, loc ) 1356 else: 1357 preloc = loc 1358 tokensStart = preloc 1359 try: 1360 try: 1361 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1362 except IndexError: 1363 raise ParseException( instring, len(instring), self.errmsg, self ) 1364 except ParseBaseException as err: 1365 #~ print ("Exception raised:", err) 1366 if self.debugActions[2]: 1367 self.debugActions[2]( instring, tokensStart, self, err ) 1368 if self.failAction: 1369 self.failAction( instring, tokensStart, self, err ) 1370 raise 1371 else: 1372 if callPreParse and self.callPreparse: 1373 preloc = self.preParse( instring, loc ) 1374 else: 1375 preloc = loc 1376 tokensStart = preloc 1377 if self.mayIndexError or loc >= len(instring): 1378 try: 1379 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1380 except IndexError: 1381 raise ParseException( instring, len(instring), self.errmsg, self ) 1382 else: 1383 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1384 1385 tokens = self.postParse( instring, loc, tokens ) 1386 1387 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1388 if self.parseAction and (doActions or self.callDuringTry): 1389 if debugging: 1390 try: 1391 for fn in self.parseAction: 1392 tokens = fn( instring, tokensStart, retTokens ) 1393 if tokens is not None: 1394 retTokens = ParseResults( tokens, 1395 self.resultsName, 1396 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1397 modal=self.modalResults ) 1398 except ParseBaseException as err: 1399 #~ print "Exception raised in user parse action:", err 1400 if (self.debugActions[2] ): 1401 self.debugActions[2]( instring, tokensStart, self, err ) 1402 raise 1403 else: 1404 for fn in self.parseAction: 1405 tokens = fn( instring, tokensStart, retTokens ) 1406 if tokens is not None: 1407 retTokens = ParseResults( tokens, 1408 self.resultsName, 1409 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1410 modal=self.modalResults ) 1411 1412 if debugging: 1413 #~ print ("Matched",self,"->",retTokens.asList()) 1414 if (self.debugActions[1] ): 1415 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1416 1417 return loc, retTokens
1418
1419 - def tryParse( self, instring, loc ):
1420 try: 1421 return self._parse( instring, loc, doActions=False )[0] 1422 except ParseFatalException: 1423 raise ParseException( instring, loc, self.errmsg, self)
1424
1425 - def canParseNext(self, instring, loc):
1426 try: 1427 self.tryParse(instring, loc) 1428 except (ParseException, IndexError): 1429 return False 1430 else: 1431 return True
1432
1433 - class _UnboundedCache(object):
1434 - def __init__(self):
1435 cache = {} 1436 self.not_in_cache = not_in_cache = object() 1437 1438 def get(self, key): 1439 return cache.get(key, not_in_cache)
1440 1441 def set(self, key, value): 1442 cache[key] = value
1443 1444 def clear(self): 1445 cache.clear() 1446 1447 def cache_len(self): 1448 return len(cache) 1449 1450 self.get = types.MethodType(get, self) 1451 self.set = types.MethodType(set, self) 1452 self.clear = types.MethodType(clear, self) 1453 self.__len__ = types.MethodType(cache_len, self) 1454 1455 if _OrderedDict is not None:
1456 - class _FifoCache(object):
1457 - def __init__(self, size):
1458 self.not_in_cache = not_in_cache = object() 1459 1460 cache = _OrderedDict() 1461 1462 def get(self, key): 1463 return cache.get(key, not_in_cache)
1464 1465 def set(self, key, value): 1466 cache[key] = value 1467 while len(cache) > size: 1468 try: 1469 cache.popitem(False) 1470 except KeyError: 1471 pass
1472 1473 def clear(self): 1474 cache.clear() 1475 1476 def cache_len(self): 1477 return len(cache) 1478 1479 self.get = types.MethodType(get, self) 1480 self.set = types.MethodType(set, self) 1481 self.clear = types.MethodType(clear, self) 1482 self.__len__ = types.MethodType(cache_len, self) 1483 1484 else:
1485 - class _FifoCache(object):
1486 - def __init__(self, size):
1487 self.not_in_cache = not_in_cache = object() 1488 1489 cache = {} 1490 key_fifo = collections.deque([], size) 1491 1492 def get(self, key): 1493 return cache.get(key, not_in_cache)
1494 1495 def set(self, key, value): 1496 cache[key] = value 1497 while len(key_fifo) > size: 1498 cache.pop(key_fifo.popleft(), None) 1499 key_fifo.append(key)
1500 1501 def clear(self): 1502 cache.clear() 1503 key_fifo.clear() 1504 1505 def cache_len(self): 1506 return len(cache) 1507 1508 self.get = types.MethodType(get, self) 1509 self.set = types.MethodType(set, self) 1510 self.clear = types.MethodType(clear, self) 1511 self.__len__ = types.MethodType(cache_len, self) 1512 1513 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1514 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1515 packrat_cache_lock = RLock() 1516 packrat_cache_stats = [0, 0] 1517 1518 # this method gets repeatedly called during backtracking with the same arguments - 1519 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1520 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1521 HIT, MISS = 0, 1 1522 lookup = (self, instring, loc, callPreParse, doActions) 1523 with ParserElement.packrat_cache_lock: 1524 cache = ParserElement.packrat_cache 1525 value = cache.get(lookup) 1526 if value is cache.not_in_cache: 1527 ParserElement.packrat_cache_stats[MISS] += 1 1528 try: 1529 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1530 except ParseBaseException as pe: 1531 # cache a copy of the exception, without the traceback 1532 cache.set(lookup, pe.__class__(*pe.args)) 1533 raise 1534 else: 1535 cache.set(lookup, (value[0], value[1].copy())) 1536 return value 1537 else: 1538 ParserElement.packrat_cache_stats[HIT] += 1 1539 if isinstance(value, Exception): 1540 raise value 1541 return (value[0], value[1].copy())
1542 1543 _parse = _parseNoCache 1544 1545 @staticmethod
1546 - def resetCache():
1547 ParserElement.packrat_cache.clear() 1548 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1549 1550 _packratEnabled = False 1551 @staticmethod
1552 - def enablePackrat(cache_size_limit=128):
1553 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1554 Repeated parse attempts at the same string location (which happens 1555 often in many complex grammars) can immediately return a cached value, 1556 instead of re-executing parsing/validating code. Memoizing is done of 1557 both valid results and parsing exceptions. 1558 1559 Parameters: 1560 - cache_size_limit - (default=C{128}) - if an integer value is provided 1561 will limit the size of the packrat cache; if None is passed, then 1562 the cache size will be unbounded; if 0 is passed, the cache will 1563 be effectively disabled. 1564 1565 This speedup may break existing programs that use parse actions that 1566 have side-effects. For this reason, packrat parsing is disabled when 1567 you first import pyparsing. To activate the packrat feature, your 1568 program must call the class method C{ParserElement.enablePackrat()}. If 1569 your program uses C{psyco} to "compile as you go", you must call 1570 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1571 Python will crash. For best results, call C{enablePackrat()} immediately 1572 after importing pyparsing. 1573 1574 Example:: 1575 import pyparsing 1576 pyparsing.ParserElement.enablePackrat() 1577 """ 1578 if not ParserElement._packratEnabled: 1579 ParserElement._packratEnabled = True 1580 if cache_size_limit is None: 1581 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1582 else: 1583 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1584 ParserElement._parse = ParserElement._parseCache
1585
1586 - def parseString( self, instring, parseAll=False ):
1587 """ 1588 Execute the parse expression with the given string. 1589 This is the main interface to the client code, once the complete 1590 expression has been built. 1591 1592 If you want the grammar to require that the entire input string be 1593 successfully parsed, then set C{parseAll} to True (equivalent to ending 1594 the grammar with C{L{StringEnd()}}). 1595 1596 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1597 in order to report proper column numbers in parse actions. 1598 If the input string contains tabs and 1599 the grammar uses parse actions that use the C{loc} argument to index into the 1600 string being parsed, you can ensure you have a consistent view of the input 1601 string by: 1602 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1603 (see L{I{parseWithTabs}<parseWithTabs>}) 1604 - define your parse action using the full C{(s,loc,toks)} signature, and 1605 reference the input string using the parse action's C{s} argument 1606 - explictly expand the tabs in your input string before calling 1607 C{parseString} 1608 1609 Example:: 1610 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1611 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1612 """ 1613 ParserElement.resetCache() 1614 if not self.streamlined: 1615 self.streamline() 1616 #~ self.saveAsList = True 1617 for e in self.ignoreExprs: 1618 e.streamline() 1619 if not self.keepTabs: 1620 instring = instring.expandtabs() 1621 try: 1622 loc, tokens = self._parse( instring, 0 ) 1623 if parseAll: 1624 loc = self.preParse( instring, loc ) 1625 se = Empty() + StringEnd() 1626 se._parse( instring, loc ) 1627 except ParseBaseException as exc: 1628 if ParserElement.verbose_stacktrace: 1629 raise 1630 else: 1631 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1632 raise exc 1633 else: 1634 return tokens
1635
1636 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1637 """ 1638 Scan the input string for expression matches. Each match will return the 1639 matching tokens, start location, and end location. May be called with optional 1640 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1641 C{overlap} is specified, then overlapping matches will be reported. 1642 1643 Note that the start and end locations are reported relative to the string 1644 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1645 strings with embedded tabs. 1646 1647 Example:: 1648 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1649 print(source) 1650 for tokens,start,end in Word(alphas).scanString(source): 1651 print(' '*start + '^'*(end-start)) 1652 print(' '*start + tokens[0]) 1653 1654 prints:: 1655 1656 sldjf123lsdjjkf345sldkjf879lkjsfd987 1657 ^^^^^ 1658 sldjf 1659 ^^^^^^^ 1660 lsdjjkf 1661 ^^^^^^ 1662 sldkjf 1663 ^^^^^^ 1664 lkjsfd 1665 """ 1666 if not self.streamlined: 1667 self.streamline() 1668 for e in self.ignoreExprs: 1669 e.streamline() 1670 1671 if not self.keepTabs: 1672 instring = _ustr(instring).expandtabs() 1673 instrlen = len(instring) 1674 loc = 0 1675 preparseFn = self.preParse 1676 parseFn = self._parse 1677 ParserElement.resetCache() 1678 matches = 0 1679 try: 1680 while loc <= instrlen and matches < maxMatches: 1681 try: 1682 preloc = preparseFn( instring, loc ) 1683 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1684 except ParseException: 1685 loc = preloc+1 1686 else: 1687 if nextLoc > loc: 1688 matches += 1 1689 yield tokens, preloc, nextLoc 1690 if overlap: 1691 nextloc = preparseFn( instring, loc ) 1692 if nextloc > loc: 1693 loc = nextLoc 1694 else: 1695 loc += 1 1696 else: 1697 loc = nextLoc 1698 else: 1699 loc = preloc+1 1700 except ParseBaseException as exc: 1701 if ParserElement.verbose_stacktrace: 1702 raise 1703 else: 1704 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1705 raise exc
1706
1707 - def transformString( self, instring ):
1708 """ 1709 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1710 be returned from a parse action. To use C{transformString}, define a grammar and 1711 attach a parse action to it that modifies the returned token list. 1712 Invoking C{transformString()} on a target string will then scan for matches, 1713 and replace the matched text patterns according to the logic in the parse 1714 action. C{transformString()} returns the resulting transformed string. 1715 1716 Example:: 1717 wd = Word(alphas) 1718 wd.setParseAction(lambda toks: toks[0].title()) 1719 1720 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1721 Prints:: 1722 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1723 """ 1724 out = [] 1725 lastE = 0 1726 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1727 # keep string locs straight between transformString and scanString 1728 self.keepTabs = True 1729 try: 1730 for t,s,e in self.scanString( instring ): 1731 out.append( instring[lastE:s] ) 1732 if t: 1733 if isinstance(t,ParseResults): 1734 out += t.asList() 1735 elif isinstance(t,list): 1736 out += t 1737 else: 1738 out.append(t) 1739 lastE = e 1740 out.append(instring[lastE:]) 1741 out = [o for o in out if o] 1742 return "".join(map(_ustr,_flatten(out))) 1743 except ParseBaseException as exc: 1744 if ParserElement.verbose_stacktrace: 1745 raise 1746 else: 1747 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1748 raise exc
1749
1750 - def searchString( self, instring, maxMatches=_MAX_INT ):
1751 """ 1752 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1753 to match the given parse expression. May be called with optional 1754 C{maxMatches} argument, to clip searching after 'n' matches are found. 1755 1756 Example:: 1757 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1758 cap_word = Word(alphas.upper(), alphas.lower()) 1759 1760 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1761 1762 # the sum() builtin can be used to merge results into a single ParseResults object 1763 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 1764 prints:: 1765 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 1766 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 1767 """ 1768 try: 1769 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1770 except ParseBaseException as exc: 1771 if ParserElement.verbose_stacktrace: 1772 raise 1773 else: 1774 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1775 raise exc
1776
1777 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1778 """ 1779 Generator method to split a string using the given expression as a separator. 1780 May be called with optional C{maxsplit} argument, to limit the number of splits; 1781 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1782 matching text should be included in the split results. 1783 1784 Example:: 1785 punc = oneOf(list(".,;:/-!?")) 1786 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1787 prints:: 1788 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1789 """ 1790 splits = 0 1791 last = 0 1792 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1793 yield instring[last:s] 1794 if includeSeparators: 1795 yield t[0] 1796 last = e 1797 yield instring[last:]
1798
1799 - def __add__(self, other ):
1800 """ 1801 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 1802 converts them to L{Literal}s by default. 1803 1804 Example:: 1805 greet = Word(alphas) + "," + Word(alphas) + "!" 1806 hello = "Hello, World!" 1807 print (hello, "->", greet.parseString(hello)) 1808 Prints:: 1809 Hello, World! -> ['Hello', ',', 'World', '!'] 1810 """ 1811 if isinstance( other, basestring ): 1812 other = ParserElement._literalStringClass( other ) 1813 if not isinstance( other, ParserElement ): 1814 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1815 SyntaxWarning, stacklevel=2) 1816 return None 1817 return And( [ self, other ] )
1818
1819 - def __radd__(self, other ):
1820 """ 1821 Implementation of + operator when left operand is not a C{L{ParserElement}} 1822 """ 1823 if isinstance( other, basestring ): 1824 other = ParserElement._literalStringClass( other ) 1825 if not isinstance( other, ParserElement ): 1826 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1827 SyntaxWarning, stacklevel=2) 1828 return None 1829 return other + self
1830
1831 - def __sub__(self, other):
1832 """ 1833 Implementation of - operator, returns C{L{And}} with error stop 1834 """ 1835 if isinstance( other, basestring ): 1836 other = ParserElement._literalStringClass( other ) 1837 if not isinstance( other, ParserElement ): 1838 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1839 SyntaxWarning, stacklevel=2) 1840 return None 1841 return self + And._ErrorStop() + other
1842
1843 - def __rsub__(self, other ):
1844 """ 1845 Implementation of - operator when left operand is not a C{L{ParserElement}} 1846 """ 1847 if isinstance( other, basestring ): 1848 other = ParserElement._literalStringClass( other ) 1849 if not isinstance( other, ParserElement ): 1850 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1851 SyntaxWarning, stacklevel=2) 1852 return None 1853 return other - self
1854
1855 - def __mul__(self,other):
1856 """ 1857 Implementation of * operator, allows use of C{expr * 3} in place of 1858 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1859 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1860 may also include C{None} as in: 1861 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1862 to C{expr*n + L{ZeroOrMore}(expr)} 1863 (read as "at least n instances of C{expr}") 1864 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1865 (read as "0 to n instances of C{expr}") 1866 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1867 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1868 1869 Note that C{expr*(None,n)} does not raise an exception if 1870 more than n exprs exist in the input stream; that is, 1871 C{expr*(None,n)} does not enforce a maximum number of expr 1872 occurrences. If this behavior is desired, then write 1873 C{expr*(None,n) + ~expr} 1874 """ 1875 if isinstance(other,int): 1876 minElements, optElements = other,0 1877 elif isinstance(other,tuple): 1878 other = (other + (None, None))[:2] 1879 if other[0] is None: 1880 other = (0, other[1]) 1881 if isinstance(other[0],int) and other[1] is None: 1882 if other[0] == 0: 1883 return ZeroOrMore(self) 1884 if other[0] == 1: 1885 return OneOrMore(self) 1886 else: 1887 return self*other[0] + ZeroOrMore(self) 1888 elif isinstance(other[0],int) and isinstance(other[1],int): 1889 minElements, optElements = other 1890 optElements -= minElements 1891 else: 1892 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1893 else: 1894 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1895 1896 if minElements < 0: 1897 raise ValueError("cannot multiply ParserElement by negative value") 1898 if optElements < 0: 1899 raise ValueError("second tuple value must be greater or equal to first tuple value") 1900 if minElements == optElements == 0: 1901 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1902 1903 if (optElements): 1904 def makeOptionalList(n): 1905 if n>1: 1906 return Optional(self + makeOptionalList(n-1)) 1907 else: 1908 return Optional(self)
1909 if minElements: 1910 if minElements == 1: 1911 ret = self + makeOptionalList(optElements) 1912 else: 1913 ret = And([self]*minElements) + makeOptionalList(optElements) 1914 else: 1915 ret = makeOptionalList(optElements) 1916 else: 1917 if minElements == 1: 1918 ret = self 1919 else: 1920 ret = And([self]*minElements) 1921 return ret 1922
1923 - def __rmul__(self, other):
1924 return self.__mul__(other)
1925
1926 - def __or__(self, other ):
1927 """ 1928 Implementation of | operator - returns C{L{MatchFirst}} 1929 """ 1930 if isinstance( other, basestring ): 1931 other = ParserElement._literalStringClass( other ) 1932 if not isinstance( other, ParserElement ): 1933 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1934 SyntaxWarning, stacklevel=2) 1935 return None 1936 return MatchFirst( [ self, other ] )
1937
1938 - def __ror__(self, other ):
1939 """ 1940 Implementation of | operator when left operand is not a C{L{ParserElement}} 1941 """ 1942 if isinstance( other, basestring ): 1943 other = ParserElement._literalStringClass( other ) 1944 if not isinstance( other, ParserElement ): 1945 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1946 SyntaxWarning, stacklevel=2) 1947 return None 1948 return other | self
1949
1950 - def __xor__(self, other ):
1951 """ 1952 Implementation of ^ operator - returns C{L{Or}} 1953 """ 1954 if isinstance( other, basestring ): 1955 other = ParserElement._literalStringClass( other ) 1956 if not isinstance( other, ParserElement ): 1957 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1958 SyntaxWarning, stacklevel=2) 1959 return None 1960 return Or( [ self, other ] )
1961
1962 - def __rxor__(self, other ):
1963 """ 1964 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1965 """ 1966 if isinstance( other, basestring ): 1967 other = ParserElement._literalStringClass( other ) 1968 if not isinstance( other, ParserElement ): 1969 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1970 SyntaxWarning, stacklevel=2) 1971 return None 1972 return other ^ self
1973
1974 - def __and__(self, other ):
1975 """ 1976 Implementation of & operator - returns C{L{Each}} 1977 """ 1978 if isinstance( other, basestring ): 1979 other = ParserElement._literalStringClass( other ) 1980 if not isinstance( other, ParserElement ): 1981 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1982 SyntaxWarning, stacklevel=2) 1983 return None 1984 return Each( [ self, other ] )
1985
1986 - def __rand__(self, other ):
1987 """ 1988 Implementation of & operator when left operand is not a C{L{ParserElement}} 1989 """ 1990 if isinstance( other, basestring ): 1991 other = ParserElement._literalStringClass( other ) 1992 if not isinstance( other, ParserElement ): 1993 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1994 SyntaxWarning, stacklevel=2) 1995 return None 1996 return other & self
1997
1998 - def __invert__( self ):
1999 """ 2000 Implementation of ~ operator - returns C{L{NotAny}} 2001 """ 2002 return NotAny( self )
2003
2004 - def __call__(self, name=None):
2005 """ 2006 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 2007 2008 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 2009 passed as C{True}. 2010 2011 If C{name} is omitted, same as calling C{L{copy}}. 2012 2013 Example:: 2014 # these are equivalent 2015 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 2016 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 2017 """ 2018 if name is not None: 2019 return self.setResultsName(name) 2020 else: 2021 return self.copy()
2022
2023 - def suppress( self ):
2024 """ 2025 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 2026 cluttering up returned output. 2027 """ 2028 return Suppress( self )
2029
2030 - def leaveWhitespace( self ):
2031 """ 2032 Disables the skipping of whitespace before matching the characters in the 2033 C{ParserElement}'s defined pattern. This is normally only used internally by 2034 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2035 """ 2036 self.skipWhitespace = False 2037 return self
2038
2039 - def setWhitespaceChars( self, chars ):
2040 """ 2041 Overrides the default whitespace chars 2042 """ 2043 self.skipWhitespace = True 2044 self.whiteChars = chars 2045 self.copyDefaultWhiteChars = False 2046 return self
2047
2048 - def parseWithTabs( self ):
2049 """ 2050 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2051 Must be called before C{parseString} when the input grammar contains elements that 2052 match C{<TAB>} characters. 2053 """ 2054 self.keepTabs = True 2055 return self
2056
2057 - def ignore( self, other ):
2058 """ 2059 Define expression to be ignored (e.g., comments) while doing pattern 2060 matching; may be called repeatedly, to define multiple comment or other 2061 ignorable patterns. 2062 2063 Example:: 2064 patt = OneOrMore(Word(alphas)) 2065 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2066 2067 patt.ignore(cStyleComment) 2068 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2069 """ 2070 if isinstance(other, basestring): 2071 other = Suppress(other) 2072 2073 if isinstance( other, Suppress ): 2074 if other not in self.ignoreExprs: 2075 self.ignoreExprs.append(other) 2076 else: 2077 self.ignoreExprs.append( Suppress( other.copy() ) ) 2078 return self
2079
2080 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2081 """ 2082 Enable display of debugging messages while doing pattern matching. 2083 """ 2084 self.debugActions = (startAction or _defaultStartDebugAction, 2085 successAction or _defaultSuccessDebugAction, 2086 exceptionAction or _defaultExceptionDebugAction) 2087 self.debug = True 2088 return self
2089
2090 - def setDebug( self, flag=True ):
2091 """ 2092 Enable display of debugging messages while doing pattern matching. 2093 Set C{flag} to True to enable, False to disable. 2094 2095 Example:: 2096 wd = Word(alphas).setName("alphaword") 2097 integer = Word(nums).setName("numword") 2098 term = wd | integer 2099 2100 # turn on debugging for wd 2101 wd.setDebug() 2102 2103 OneOrMore(term).parseString("abc 123 xyz 890") 2104 2105 prints:: 2106 Match alphaword at loc 0(1,1) 2107 Matched alphaword -> ['abc'] 2108 Match alphaword at loc 3(1,4) 2109 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2110 Match alphaword at loc 7(1,8) 2111 Matched alphaword -> ['xyz'] 2112 Match alphaword at loc 11(1,12) 2113 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2114 Match alphaword at loc 15(1,16) 2115 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2116 2117 The output shown is that produced by the default debug actions - custom debug actions can be 2118 specified using L{setDebugActions}. Prior to attempting 2119 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 2120 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 2121 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 2122 which makes debugging and exception messages easier to understand - for instance, the default 2123 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 2124 """ 2125 if flag: 2126 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2127 else: 2128 self.debug = False 2129 return self
2130
2131 - def __str__( self ):
2132 return self.name
2133
2134 - def __repr__( self ):
2135 return _ustr(self)
2136
2137 - def streamline( self ):
2138 self.streamlined = True 2139 self.strRepr = None 2140 return self
2141
2142 - def checkRecursion( self, parseElementList ):
2143 pass
2144
2145 - def validate( self, validateTrace=[] ):
2146 """ 2147 Check defined expressions for valid structure, check for infinite recursive definitions. 2148 """ 2149 self.checkRecursion( [] )
2150
2151 - def parseFile( self, file_or_filename, parseAll=False ):
2152 """ 2153 Execute the parse expression on the given file or filename. 2154 If a filename is specified (instead of a file object), 2155 the entire file is opened, read, and closed before parsing. 2156 """ 2157 try: 2158 file_contents = file_or_filename.read() 2159 except AttributeError: 2160 with open(file_or_filename, "r") as f: 2161 file_contents = f.read() 2162 try: 2163 return self.parseString(file_contents, parseAll) 2164 except ParseBaseException as exc: 2165 if ParserElement.verbose_stacktrace: 2166 raise 2167 else: 2168 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2169 raise exc
2170
2171 - def __eq__(self,other):
2172 if isinstance(other, ParserElement): 2173 return self is other or vars(self) == vars(other) 2174 elif isinstance(other, basestring): 2175 return self.matches(other) 2176 else: 2177 return super(ParserElement,self)==other
2178
2179 - def __ne__(self,other):
2180 return not (self == other)
2181
2182 - def __hash__(self):
2183 return hash(id(self))
2184
2185 - def __req__(self,other):
2186 return self == other
2187
2188 - def __rne__(self,other):
2189 return not (self == other)
2190
2191 - def matches(self, testString, parseAll=True):
2192 """ 2193 Method for quick testing of a parser against a test string. Good for simple 2194 inline microtests of sub expressions while building up larger parser. 2195 2196 Parameters: 2197 - testString - to test against this expression for a match 2198 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2199 2200 Example:: 2201 expr = Word(nums) 2202 assert expr.matches("100") 2203 """ 2204 try: 2205 self.parseString(_ustr(testString), parseAll=parseAll) 2206 return True 2207 except ParseBaseException: 2208 return False
2209
2210 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2211 """ 2212 Execute the parse expression on a series of test strings, showing each 2213 test, the parsed results or where the parse failed. Quick and easy way to 2214 run a parse expression against a list of sample strings. 2215 2216 Parameters: 2217 - tests - a list of separate test strings, or a multiline string of test strings 2218 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2219 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2220 string; pass None to disable comment filtering 2221 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2222 if False, only dump nested list 2223 - printResults - (default=C{True}) prints test output to stdout 2224 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2225 2226 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2227 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2228 test's output 2229 2230 Example:: 2231 number_expr = pyparsing_common.number.copy() 2232 2233 result = number_expr.runTests(''' 2234 # unsigned integer 2235 100 2236 # negative integer 2237 -100 2238 # float with scientific notation 2239 6.02e23 2240 # integer with scientific notation 2241 1e-12 2242 ''') 2243 print("Success" if result[0] else "Failed!") 2244 2245 result = number_expr.runTests(''' 2246 # stray character 2247 100Z 2248 # missing leading digit before '.' 2249 -.100 2250 # too many '.' 2251 3.14.159 2252 ''', failureTests=True) 2253 print("Success" if result[0] else "Failed!") 2254 prints:: 2255 # unsigned integer 2256 100 2257 [100] 2258 2259 # negative integer 2260 -100 2261 [-100] 2262 2263 # float with scientific notation 2264 6.02e23 2265 [6.02e+23] 2266 2267 # integer with scientific notation 2268 1e-12 2269 [1e-12] 2270 2271 Success 2272 2273 # stray character 2274 100Z 2275 ^ 2276 FAIL: Expected end of text (at char 3), (line:1, col:4) 2277 2278 # missing leading digit before '.' 2279 -.100 2280 ^ 2281 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2282 2283 # too many '.' 2284 3.14.159 2285 ^ 2286 FAIL: Expected end of text (at char 4), (line:1, col:5) 2287 2288 Success 2289 2290 Each test string must be on a single line. If you want to test a string that spans multiple 2291 lines, create a test like this:: 2292 2293 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2294 2295 (Note that this is a raw string literal, you must include the leading 'r'.) 2296 """ 2297 if isinstance(tests, basestring): 2298 tests = list(map(str.strip, tests.rstrip().splitlines())) 2299 if isinstance(comment, basestring): 2300 comment = Literal(comment) 2301 allResults = [] 2302 comments = [] 2303 success = True 2304 for t in tests: 2305 if comment is not None and comment.matches(t, False) or comments and not t: 2306 comments.append(t) 2307 continue 2308 if not t: 2309 continue 2310 out = ['\n'.join(comments), t] 2311 comments = [] 2312 try: 2313 t = t.replace(r'\n','\n') 2314 result = self.parseString(t, parseAll=parseAll) 2315 out.append(result.dump(full=fullDump)) 2316 success = success and not failureTests 2317 except ParseBaseException as pe: 2318 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2319 if '\n' in t: 2320 out.append(line(pe.loc, t)) 2321 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2322 else: 2323 out.append(' '*pe.loc + '^' + fatal) 2324 out.append("FAIL: " + str(pe)) 2325 success = success and failureTests 2326 result = pe 2327 except Exception as exc: 2328 out.append("FAIL-EXCEPTION: " + str(exc)) 2329 success = success and failureTests 2330 result = exc 2331 2332 if printResults: 2333 if fullDump: 2334 out.append('') 2335 print('\n'.join(out)) 2336 2337 allResults.append((t, result)) 2338 2339 return success, allResults
2340
2341 2342 -class Token(ParserElement):
2343 """ 2344 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2345 """
2346 - def __init__( self ):
2347 super(Token,self).__init__( savelist=False )
2348
2349 2350 -class Empty(Token):
2351 """ 2352 An empty token, will always match. 2353 """
2354 - def __init__( self ):
2355 super(Empty,self).__init__() 2356 self.name = "Empty" 2357 self.mayReturnEmpty = True 2358 self.mayIndexError = False
2359
2360 2361 -class NoMatch(Token):
2362 """ 2363 A token that will never match. 2364 """
2365 - def __init__( self ):
2366 super(NoMatch,self).__init__() 2367 self.name = "NoMatch" 2368 self.mayReturnEmpty = True 2369 self.mayIndexError = False 2370 self.errmsg = "Unmatchable token"
2371
2372 - def parseImpl( self, instring, loc, doActions=True ):
2373 raise ParseException(instring, loc, self.errmsg, self)
2374
2375 2376 -class Literal(Token):
2377 """ 2378 Token to exactly match a specified string. 2379 2380 Example:: 2381 Literal('blah').parseString('blah') # -> ['blah'] 2382 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2383 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2384 2385 For case-insensitive matching, use L{CaselessLiteral}. 2386 2387 For keyword matching (force word break before and after the matched string), 2388 use L{Keyword} or L{CaselessKeyword}. 2389 """
2390 - def __init__( self, matchString ):
2391 super(Literal,self).__init__() 2392 self.match = matchString 2393 self.matchLen = len(matchString) 2394 try: 2395 self.firstMatchChar = matchString[0] 2396 except IndexError: 2397 warnings.warn("null string passed to Literal; use Empty() instead", 2398 SyntaxWarning, stacklevel=2) 2399 self.__class__ = Empty 2400 self.name = '"%s"' % _ustr(self.match) 2401 self.errmsg = "Expected " + self.name 2402 self.mayReturnEmpty = False 2403 self.mayIndexError = False
2404 2405 # Performance tuning: this routine gets called a *lot* 2406 # if this is a single character match string and the first character matches, 2407 # short-circuit as quickly as possible, and avoid calling startswith 2408 #~ @profile
2409 - def parseImpl( self, instring, loc, doActions=True ):
2410 if (instring[loc] == self.firstMatchChar and 2411 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2412 return loc+self.matchLen, self.match 2413 raise ParseException(instring, loc, self.errmsg, self)
2414 _L = Literal 2415 ParserElement._literalStringClass = Literal
2416 2417 -class Keyword(Token):
2418 """ 2419 Token to exactly match a specified string as a keyword, that is, it must be 2420 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2421 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2422 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2423 Accepts two optional constructor arguments in addition to the keyword string: 2424 - C{identChars} is a string of characters that would be valid identifier characters, 2425 defaulting to all alphanumerics + "_" and "$" 2426 - C{caseless} allows case-insensitive matching, default is C{False}. 2427 2428 Example:: 2429 Keyword("start").parseString("start") # -> ['start'] 2430 Keyword("start").parseString("starting") # -> Exception 2431 2432 For case-insensitive matching, use L{CaselessKeyword}. 2433 """ 2434 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2435
2436 - def __init__( self, matchString, identChars=None, caseless=False ):
2437 super(Keyword,self).__init__() 2438 if identChars is None: 2439 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2440 self.match = matchString 2441 self.matchLen = len(matchString) 2442 try: 2443 self.firstMatchChar = matchString[0] 2444 except IndexError: 2445 warnings.warn("null string passed to Keyword; use Empty() instead", 2446 SyntaxWarning, stacklevel=2) 2447 self.name = '"%s"' % self.match 2448 self.errmsg = "Expected " + self.name 2449 self.mayReturnEmpty = False 2450 self.mayIndexError = False 2451 self.caseless = caseless 2452 if caseless: 2453 self.caselessmatch = matchString.upper() 2454 identChars = identChars.upper() 2455 self.identChars = set(identChars)
2456
2457 - def parseImpl( self, instring, loc, doActions=True ):
2458 if self.caseless: 2459 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2460 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2461 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2462 return loc+self.matchLen, self.match 2463 else: 2464 if (instring[loc] == self.firstMatchChar and 2465 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2466 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2467 (loc == 0 or instring[loc-1] not in self.identChars) ): 2468 return loc+self.matchLen, self.match 2469 raise ParseException(instring, loc, self.errmsg, self)
2470
2471 - def copy(self):
2472 c = super(Keyword,self).copy() 2473 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2474 return c
2475 2476 @staticmethod
2477 - def setDefaultKeywordChars( chars ):
2478 """Overrides the default Keyword chars 2479 """ 2480 Keyword.DEFAULT_KEYWORD_CHARS = chars
2481
2482 -class CaselessLiteral(Literal):
2483 """ 2484 Token to match a specified string, ignoring case of letters. 2485 Note: the matched results will always be in the case of the given 2486 match string, NOT the case of the input text. 2487 2488 Example:: 2489 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2490 2491 (Contrast with example for L{CaselessKeyword}.) 2492 """
2493 - def __init__( self, matchString ):
2494 super(CaselessLiteral,self).__init__( matchString.upper() ) 2495 # Preserve the defining literal. 2496 self.returnString = matchString 2497 self.name = "'%s'" % self.returnString 2498 self.errmsg = "Expected " + self.name
2499
2500 - def parseImpl( self, instring, loc, doActions=True ):
2501 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2502 return loc+self.matchLen, self.returnString 2503 raise ParseException(instring, loc, self.errmsg, self)
2504
2505 -class CaselessKeyword(Keyword):
2506 """ 2507 Caseless version of L{Keyword}. 2508 2509 Example:: 2510 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2511 2512 (Contrast with example for L{CaselessLiteral}.) 2513 """
2514 - def __init__( self, matchString, identChars=None ):
2515 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2516
2517 - def parseImpl( self, instring, loc, doActions=True ):
2518 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2519 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2520 return loc+self.matchLen, self.match 2521 raise ParseException(instring, loc, self.errmsg, self)
2522
2523 -class CloseMatch(Token):
2524 """ 2525 A variation on L{Literal} which matches "close" matches, that is, 2526 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 2527 - C{match_string} - string to be matched 2528 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 2529 2530 The results from a successful parse will contain the matched text from the input string and the following named results: 2531 - C{mismatches} - a list of the positions within the match_string where mismatches were found 2532 - C{original} - the original match_string used to compare against the input string 2533 2534 If C{mismatches} is an empty list, then the match was an exact match. 2535 2536 Example:: 2537 patt = CloseMatch("ATCATCGAATGGA") 2538 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2539 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2540 2541 # exact match 2542 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2543 2544 # close match allowing up to 2 mismatches 2545 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 2546 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2547 """
2548 - def __init__(self, match_string, maxMismatches=1):
2549 super(CloseMatch,self).__init__() 2550 self.name = match_string 2551 self.match_string = match_string 2552 self.maxMismatches = maxMismatches 2553 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 2554 self.mayIndexError = False 2555 self.mayReturnEmpty = False
2556
2557 - def parseImpl( self, instring, loc, doActions=True ):
2558 start = loc 2559 instrlen = len(instring) 2560 maxloc = start + len(self.match_string) 2561 2562 if maxloc <= instrlen: 2563 match_string = self.match_string 2564 match_stringloc = 0 2565 mismatches = [] 2566 maxMismatches = self.maxMismatches 2567 2568 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 2569 src,mat = s_m 2570 if src != mat: 2571 mismatches.append(match_stringloc) 2572 if len(mismatches) > maxMismatches: 2573 break 2574 else: 2575 loc = match_stringloc + 1 2576 results = ParseResults([instring[start:loc]]) 2577 results['original'] = self.match_string 2578 results['mismatches'] = mismatches 2579 return loc, results 2580 2581 raise ParseException(instring, loc, self.errmsg, self)
2582
2583 2584 -class Word(Token):
2585 """ 2586 Token for matching words composed of allowed character sets. 2587 Defined with string containing all allowed initial characters, 2588 an optional string containing allowed body characters (if omitted, 2589 defaults to the initial character set), and an optional minimum, 2590 maximum, and/or exact length. The default value for C{min} is 1 (a 2591 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2592 are 0, meaning no maximum or exact length restriction. An optional 2593 C{excludeChars} parameter can list characters that might be found in 2594 the input C{bodyChars} string; useful to define a word of all printables 2595 except for one or two characters, for instance. 2596 2597 L{srange} is useful for defining custom character set strings for defining 2598 C{Word} expressions, using range notation from regular expression character sets. 2599 2600 A common mistake is to use C{Word} to match a specific literal string, as in 2601 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2602 I{sets} of matchable characters. This expression would match "Add", "AAA", 2603 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2604 To match an exact literal string, use L{Literal} or L{Keyword}. 2605 2606 pyparsing includes helper strings for building Words: 2607 - L{alphas} 2608 - L{nums} 2609 - L{alphanums} 2610 - L{hexnums} 2611 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2612 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2613 - L{printables} (any non-whitespace character) 2614 2615 Example:: 2616 # a word composed of digits 2617 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2618 2619 # a word with a leading capital, and zero or more lowercase 2620 capital_word = Word(alphas.upper(), alphas.lower()) 2621 2622 # hostnames are alphanumeric, with leading alpha, and '-' 2623 hostname = Word(alphas, alphanums+'-') 2624 2625 # roman numeral (not a strict parser, accepts invalid mix of characters) 2626 roman = Word("IVXLCDM") 2627 2628 # any string of non-whitespace characters, except for ',' 2629 csv_value = Word(printables, excludeChars=",") 2630 """
2631 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2632 super(Word,self).__init__() 2633 if excludeChars: 2634 initChars = ''.join(c for c in initChars if c not in excludeChars) 2635 if bodyChars: 2636 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2637 self.initCharsOrig = initChars 2638 self.initChars = set(initChars) 2639 if bodyChars : 2640 self.bodyCharsOrig = bodyChars 2641 self.bodyChars = set(bodyChars) 2642 else: 2643 self.bodyCharsOrig = initChars 2644 self.bodyChars = set(initChars) 2645 2646 self.maxSpecified = max > 0 2647 2648 if min < 1: 2649 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2650 2651 self.minLen = min 2652 2653 if max > 0: 2654 self.maxLen = max 2655 else: 2656 self.maxLen = _MAX_INT 2657 2658 if exact > 0: 2659 self.maxLen = exact 2660 self.minLen = exact 2661 2662 self.name = _ustr(self) 2663 self.errmsg = "Expected " + self.name 2664 self.mayIndexError = False 2665 self.asKeyword = asKeyword 2666 2667 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2668 if self.bodyCharsOrig == self.initCharsOrig: 2669 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2670 elif len(self.initCharsOrig) == 1: 2671 self.reString = "%s[%s]*" % \ 2672 (re.escape(self.initCharsOrig), 2673 _escapeRegexRangeChars(self.bodyCharsOrig),) 2674 else: 2675 self.reString = "[%s][%s]*" % \ 2676 (_escapeRegexRangeChars(self.initCharsOrig), 2677 _escapeRegexRangeChars(self.bodyCharsOrig),) 2678 if self.asKeyword: 2679 self.reString = r"\b"+self.reString+r"\b" 2680 try: 2681 self.re = re.compile( self.reString ) 2682 except Exception: 2683 self.re = None
2684
2685 - def parseImpl( self, instring, loc, doActions=True ):
2686 if self.re: 2687 result = self.re.match(instring,loc) 2688 if not result: 2689 raise ParseException(instring, loc, self.errmsg, self) 2690 2691 loc = result.end() 2692 return loc, result.group() 2693 2694 if not(instring[ loc ] in self.initChars): 2695 raise ParseException(instring, loc, self.errmsg, self) 2696 2697 start = loc 2698 loc += 1 2699 instrlen = len(instring) 2700 bodychars = self.bodyChars 2701 maxloc = start + self.maxLen 2702 maxloc = min( maxloc, instrlen ) 2703 while loc < maxloc and instring[loc] in bodychars: 2704 loc += 1 2705 2706 throwException = False 2707 if loc - start < self.minLen: 2708 throwException = True 2709 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2710 throwException = True 2711 if self.asKeyword: 2712 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2713 throwException = True 2714 2715 if throwException: 2716 raise ParseException(instring, loc, self.errmsg, self) 2717 2718 return loc, instring[start:loc]
2719
2720 - def __str__( self ):
2721 try: 2722 return super(Word,self).__str__() 2723 except Exception: 2724 pass 2725 2726 2727 if self.strRepr is None: 2728 2729 def charsAsStr(s): 2730 if len(s)>4: 2731 return s[:4]+"..." 2732 else: 2733 return s
2734 2735 if ( self.initCharsOrig != self.bodyCharsOrig ): 2736 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2737 else: 2738 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2739 2740 return self.strRepr
2741
2742 2743 -class Regex(Token):
2744 r""" 2745 Token for matching strings that match a given regular expression. 2746 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2747 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 2748 named parse results. 2749 2750 Example:: 2751 realnum = Regex(r"[+-]?\d+\.\d*") 2752 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2753 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2754 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2755 """ 2756 compiledREtype = type(re.compile("[A-Z]"))
2757 - def __init__( self, pattern, flags=0):
2758 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2759 super(Regex,self).__init__() 2760 2761 if isinstance(pattern, basestring): 2762 if not pattern: 2763 warnings.warn("null string passed to Regex; use Empty() instead", 2764 SyntaxWarning, stacklevel=2) 2765 2766 self.pattern = pattern 2767 self.flags = flags 2768 2769 try: 2770 self.re = re.compile(self.pattern, self.flags) 2771 self.reString = self.pattern 2772 except sre_constants.error: 2773 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2774 SyntaxWarning, stacklevel=2) 2775 raise 2776 2777 elif isinstance(pattern, Regex.compiledREtype): 2778 self.re = pattern 2779 self.pattern = \ 2780 self.reString = str(pattern) 2781 self.flags = flags 2782 2783 else: 2784 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2785 2786 self.name = _ustr(self) 2787 self.errmsg = "Expected " + self.name 2788 self.mayIndexError = False 2789 self.mayReturnEmpty = True
2790
2791 - def parseImpl( self, instring, loc, doActions=True ):
2792 result = self.re.match(instring,loc) 2793 if not result: 2794 raise ParseException(instring, loc, self.errmsg, self) 2795 2796 loc = result.end() 2797 d = result.groupdict() 2798 ret = ParseResults(result.group()) 2799 if d: 2800 for k in d: 2801 ret[k] = d[k] 2802 return loc,ret
2803
2804 - def __str__( self ):
2805 try: 2806 return super(Regex,self).__str__() 2807 except Exception: 2808 pass 2809 2810 if self.strRepr is None: 2811 self.strRepr = "Re:(%s)" % repr(self.pattern) 2812 2813 return self.strRepr
2814
2815 2816 -class QuotedString(Token):
2817 r""" 2818 Token for matching strings that are delimited by quoting characters. 2819 2820 Defined with the following parameters: 2821 - quoteChar - string of one or more characters defining the quote delimiting string 2822 - escChar - character to escape quotes, typically backslash (default=C{None}) 2823 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2824 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2825 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2826 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2827 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2828 2829 Example:: 2830 qs = QuotedString('"') 2831 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2832 complex_qs = QuotedString('{{', endQuoteChar='}}') 2833 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2834 sql_qs = QuotedString('"', escQuote='""') 2835 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2836 prints:: 2837 [['This is the quote']] 2838 [['This is the "quote"']] 2839 [['This is the quote with "embedded" quotes']] 2840 """
2841 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2842 super(QuotedString,self).__init__() 2843 2844 # remove white space from quote chars - wont work anyway 2845 quoteChar = quoteChar.strip() 2846 if not quoteChar: 2847 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2848 raise SyntaxError() 2849 2850 if endQuoteChar is None: 2851 endQuoteChar = quoteChar 2852 else: 2853 endQuoteChar = endQuoteChar.strip() 2854 if not endQuoteChar: 2855 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2856 raise SyntaxError() 2857 2858 self.quoteChar = quoteChar 2859 self.quoteCharLen = len(quoteChar) 2860 self.firstQuoteChar = quoteChar[0] 2861 self.endQuoteChar = endQuoteChar 2862 self.endQuoteCharLen = len(endQuoteChar) 2863 self.escChar = escChar 2864 self.escQuote = escQuote 2865 self.unquoteResults = unquoteResults 2866 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2867 2868 if multiline: 2869 self.flags = re.MULTILINE | re.DOTALL 2870 self.pattern = r'%s(?:[^%s%s]' % \ 2871 ( re.escape(self.quoteChar), 2872 _escapeRegexRangeChars(self.endQuoteChar[0]), 2873 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2874 else: 2875 self.flags = 0 2876 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2877 ( re.escape(self.quoteChar), 2878 _escapeRegexRangeChars(self.endQuoteChar[0]), 2879 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2880 if len(self.endQuoteChar) > 1: 2881 self.pattern += ( 2882 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2883 _escapeRegexRangeChars(self.endQuoteChar[i])) 2884 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2885 ) 2886 if escQuote: 2887 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2888 if escChar: 2889 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2890 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2891 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2892 2893 try: 2894 self.re = re.compile(self.pattern, self.flags) 2895 self.reString = self.pattern 2896 except sre_constants.error: 2897 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2898 SyntaxWarning, stacklevel=2) 2899 raise 2900 2901 self.name = _ustr(self) 2902 self.errmsg = "Expected " + self.name 2903 self.mayIndexError = False 2904 self.mayReturnEmpty = True
2905
2906 - def parseImpl( self, instring, loc, doActions=True ):
2907 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2908 if not result: 2909 raise ParseException(instring, loc, self.errmsg, self) 2910 2911 loc = result.end() 2912 ret = result.group() 2913 2914 if self.unquoteResults: 2915 2916 # strip off quotes 2917 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2918 2919 if isinstance(ret,basestring): 2920 # replace escaped whitespace 2921 if '\\' in ret and self.convertWhitespaceEscapes: 2922 ws_map = { 2923 r'\t' : '\t', 2924 r'\n' : '\n', 2925 r'\f' : '\f', 2926 r'\r' : '\r', 2927 } 2928 for wslit,wschar in ws_map.items(): 2929 ret = ret.replace(wslit, wschar) 2930 2931 # replace escaped characters 2932 if self.escChar: 2933 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 2934 2935 # replace escaped quotes 2936 if self.escQuote: 2937 ret = ret.replace(self.escQuote, self.endQuoteChar) 2938 2939 return loc, ret
2940
2941 - def __str__( self ):
2942 try: 2943 return super(QuotedString,self).__str__() 2944 except Exception: 2945 pass 2946 2947 if self.strRepr is None: 2948 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2949 2950 return self.strRepr
2951
2952 2953 -class CharsNotIn(Token):
2954 """ 2955 Token for matching words composed of characters I{not} in a given set (will 2956 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2957 Defined with string containing all disallowed characters, and an optional 2958 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2959 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2960 are 0, meaning no maximum or exact length restriction. 2961 2962 Example:: 2963 # define a comma-separated-value as anything that is not a ',' 2964 csv_value = CharsNotIn(',') 2965 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2966 prints:: 2967 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2968 """
2969 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2970 super(CharsNotIn,self).__init__() 2971 self.skipWhitespace = False 2972 self.notChars = notChars 2973 2974 if min < 1: 2975 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2976 2977 self.minLen = min 2978 2979 if max > 0: 2980 self.maxLen = max 2981 else: 2982 self.maxLen = _MAX_INT 2983 2984 if exact > 0: 2985 self.maxLen = exact 2986 self.minLen = exact 2987 2988 self.name = _ustr(self) 2989 self.errmsg = "Expected " + self.name 2990 self.mayReturnEmpty = ( self.minLen == 0 ) 2991 self.mayIndexError = False
2992
2993 - def parseImpl( self, instring, loc, doActions=True ):
2994 if instring[loc] in self.notChars: 2995 raise ParseException(instring, loc, self.errmsg, self) 2996 2997 start = loc 2998 loc += 1 2999 notchars = self.notChars 3000 maxlen = min( start+self.maxLen, len(instring) ) 3001 while loc < maxlen and \ 3002 (instring[loc] not in notchars): 3003 loc += 1 3004 3005 if loc - start < self.minLen: 3006 raise ParseException(instring, loc, self.errmsg, self) 3007 3008 return loc, instring[start:loc]
3009
3010 - def __str__( self ):
3011 try: 3012 return super(CharsNotIn, self).__str__() 3013 except Exception: 3014 pass 3015 3016 if self.strRepr is None: 3017 if len(self.notChars) > 4: 3018 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3019 else: 3020 self.strRepr = "!W:(%s)" % self.notChars 3021 3022 return self.strRepr
3023
3024 -class White(Token):
3025 """ 3026 Special matching class for matching whitespace. Normally, whitespace is ignored 3027 by pyparsing grammars. This class is included when some whitespace structures 3028 are significant. Define with a string containing the whitespace characters to be 3029 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 3030 as defined for the C{L{Word}} class. 3031 """ 3032 whiteStrs = { 3033 " " : "<SPC>", 3034 "\t": "<TAB>", 3035 "\n": "<LF>", 3036 "\r": "<CR>", 3037 "\f": "<FF>", 3038 }
3039 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3040 super(White,self).__init__() 3041 self.matchWhite = ws 3042 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 3043 #~ self.leaveWhitespace() 3044 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3045 self.mayReturnEmpty = True 3046 self.errmsg = "Expected " + self.name 3047 3048 self.minLen = min 3049 3050 if max > 0: 3051 self.maxLen = max 3052 else: 3053 self.maxLen = _MAX_INT 3054 3055 if exact > 0: 3056 self.maxLen = exact 3057 self.minLen = exact
3058
3059 - def parseImpl( self, instring, loc, doActions=True ):
3060 if not(instring[ loc ] in self.matchWhite): 3061 raise ParseException(instring, loc, self.errmsg, self) 3062 start = loc 3063 loc += 1 3064 maxloc = start + self.maxLen 3065 maxloc = min( maxloc, len(instring) ) 3066 while loc < maxloc and instring[loc] in self.matchWhite: 3067 loc += 1 3068 3069 if loc - start < self.minLen: 3070 raise ParseException(instring, loc, self.errmsg, self) 3071 3072 return loc, instring[start:loc]
3073
3074 3075 -class _PositionToken(Token):
3076 - def __init__( self ):
3077 super(_PositionToken,self).__init__() 3078 self.name=self.__class__.__name__ 3079 self.mayReturnEmpty = True 3080 self.mayIndexError = False
3081
3082 -class GoToColumn(_PositionToken):
3083 """ 3084 Token to advance to a specific column of input text; useful for tabular report scraping. 3085 """
3086 - def __init__( self, colno ):
3087 super(GoToColumn,self).__init__() 3088 self.col = colno
3089
3090 - def preParse( self, instring, loc ):
3091 if col(loc,instring) != self.col: 3092 instrlen = len(instring) 3093 if self.ignoreExprs: 3094 loc = self._skipIgnorables( instring, loc ) 3095 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 3096 loc += 1 3097 return loc
3098
3099 - def parseImpl( self, instring, loc, doActions=True ):
3100 thiscol = col( loc, instring ) 3101 if thiscol > self.col: 3102 raise ParseException( instring, loc, "Text not in expected column", self ) 3103 newloc = loc + self.col - thiscol 3104 ret = instring[ loc: newloc ] 3105 return newloc, ret
3106
3107 3108 -class LineStart(_PositionToken):
3109 """ 3110 Matches if current position is at the beginning of a line within the parse string 3111 3112 Example:: 3113 3114 test = '''\ 3115 AAA this line 3116 AAA and this line 3117 AAA but not this one 3118 B AAA and definitely not this one 3119 ''' 3120 3121 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 3122 print(t) 3123 3124 Prints:: 3125 ['AAA', ' this line'] 3126 ['AAA', ' and this line'] 3127 3128 """
3129 - def __init__( self ):
3130 super(LineStart,self).__init__() 3131 self.errmsg = "Expected start of line"
3132
3133 - def parseImpl( self, instring, loc, doActions=True ):
3134 if col(loc, instring) == 1: 3135 return loc, [] 3136 raise ParseException(instring, loc, self.errmsg, self)
3137
3138 -class LineEnd(_PositionToken):
3139 """ 3140 Matches if current position is at the end of a line within the parse string 3141 """
3142 - def __init__( self ):
3143 super(LineEnd,self).__init__() 3144 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 3145 self.errmsg = "Expected end of line"
3146
3147 - def parseImpl( self, instring, loc, doActions=True ):
3148 if loc<len(instring): 3149 if instring[loc] == "\n": 3150 return loc+1, "\n" 3151 else: 3152 raise ParseException(instring, loc, self.errmsg, self) 3153 elif loc == len(instring): 3154 return loc+1, [] 3155 else: 3156 raise ParseException(instring, loc, self.errmsg, self)
3157
3158 -class StringStart(_PositionToken):
3159 """ 3160 Matches if current position is at the beginning of the parse string 3161 """
3162 - def __init__( self ):
3163 super(StringStart,self).__init__() 3164 self.errmsg = "Expected start of text"
3165
3166 - def parseImpl( self, instring, loc, doActions=True ):
3167 if loc != 0: 3168 # see if entire string up to here is just whitespace and ignoreables 3169 if loc != self.preParse( instring, 0 ): 3170 raise ParseException(instring, loc, self.errmsg, self) 3171 return loc, []
3172
3173 -class StringEnd(_PositionToken):
3174 """ 3175 Matches if current position is at the end of the parse string 3176 """
3177 - def __init__( self ):
3178 super(StringEnd,self).__init__() 3179 self.errmsg = "Expected end of text"
3180
3181 - def parseImpl( self, instring, loc, doActions=True ):
3182 if loc < len(instring): 3183 raise ParseException(instring, loc, self.errmsg, self) 3184 elif loc == len(instring): 3185 return loc+1, [] 3186 elif loc > len(instring): 3187 return loc, [] 3188 else: 3189 raise ParseException(instring, loc, self.errmsg, self)
3190
3191 -class WordStart(_PositionToken):
3192 """ 3193 Matches if the current position is at the beginning of a Word, and 3194 is not preceded by any character in a given set of C{wordChars} 3195 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3196 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3197 the string being parsed, or at the beginning of a line. 3198 """
3199 - def __init__(self, wordChars = printables):
3200 super(WordStart,self).__init__() 3201 self.wordChars = set(wordChars) 3202 self.errmsg = "Not at the start of a word"
3203
3204 - def parseImpl(self, instring, loc, doActions=True ):
3205 if loc != 0: 3206 if (instring[loc-1] in self.wordChars or 3207 instring[loc] not in self.wordChars): 3208 raise ParseException(instring, loc, self.errmsg, self) 3209 return loc, []
3210
3211 -class WordEnd(_PositionToken):
3212 """ 3213 Matches if the current position is at the end of a Word, and 3214 is not followed by any character in a given set of C{wordChars} 3215 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3216 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3217 the string being parsed, or at the end of a line. 3218 """
3219 - def __init__(self, wordChars = printables):
3220 super(WordEnd,self).__init__() 3221 self.wordChars = set(wordChars) 3222 self.skipWhitespace = False 3223 self.errmsg = "Not at the end of a word"
3224
3225 - def parseImpl(self, instring, loc, doActions=True ):
3226 instrlen = len(instring) 3227 if instrlen>0 and loc<instrlen: 3228 if (instring[loc] in self.wordChars or 3229 instring[loc-1] not in self.wordChars): 3230 raise ParseException(instring, loc, self.errmsg, self) 3231 return loc, []
3232
3233 3234 -class ParseExpression(ParserElement):
3235 """ 3236 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3237 """
3238 - def __init__( self, exprs, savelist = False ):
3239 super(ParseExpression,self).__init__(savelist) 3240 if isinstance( exprs, _generatorType ): 3241 exprs = list(exprs) 3242 3243 if isinstance( exprs, basestring ): 3244 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3245 elif isinstance( exprs, collections.Iterable ): 3246 exprs = list(exprs) 3247 # if sequence of strings provided, wrap with Literal 3248 if all(isinstance(expr, basestring) for expr in exprs): 3249 exprs = map(ParserElement._literalStringClass, exprs) 3250 self.exprs = list(exprs) 3251 else: 3252 try: 3253 self.exprs = list( exprs ) 3254 except TypeError: 3255 self.exprs = [ exprs ] 3256 self.callPreparse = False
3257
3258 - def __getitem__( self, i ):
3259 return self.exprs[i]
3260
3261 - def append( self, other ):
3262 self.exprs.append( other ) 3263 self.strRepr = None 3264 return self
3265
3266 - def leaveWhitespace( self ):
3267 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3268 all contained expressions.""" 3269 self.skipWhitespace = False 3270 self.exprs = [ e.copy() for e in self.exprs ] 3271 for e in self.exprs: 3272 e.leaveWhitespace() 3273 return self
3274
3275 - def ignore( self, other ):
3276 if isinstance( other, Suppress ): 3277 if other not in self.ignoreExprs: 3278 super( ParseExpression, self).ignore( other ) 3279 for e in self.exprs: 3280 e.ignore( self.ignoreExprs[-1] ) 3281 else: 3282 super( ParseExpression, self).ignore( other ) 3283 for e in self.exprs: 3284 e.ignore( self.ignoreExprs[-1] ) 3285 return self
3286
3287 - def __str__( self ):
3288 try: 3289 return super(ParseExpression,self).__str__() 3290 except Exception: 3291 pass 3292 3293 if self.strRepr is None: 3294 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3295 return self.strRepr
3296
3297 - def streamline( self ):
3298 super(ParseExpression,self).streamline() 3299 3300 for e in self.exprs: 3301 e.streamline() 3302 3303 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3304 # but only if there are no parse actions or resultsNames on the nested And's 3305 # (likewise for Or's and MatchFirst's) 3306 if ( len(self.exprs) == 2 ): 3307 other = self.exprs[0] 3308 if ( isinstance( other, self.__class__ ) and 3309 not(other.parseAction) and 3310 other.resultsName is None and 3311 not other.debug ): 3312 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3313 self.strRepr = None 3314 self.mayReturnEmpty |= other.mayReturnEmpty 3315 self.mayIndexError |= other.mayIndexError 3316 3317 other = self.exprs[-1] 3318 if ( isinstance( other, self.__class__ ) and 3319 not(other.parseAction) and 3320 other.resultsName is None and 3321 not other.debug ): 3322 self.exprs = self.exprs[:-1] + other.exprs[:] 3323 self.strRepr = None 3324 self.mayReturnEmpty |= other.mayReturnEmpty 3325 self.mayIndexError |= other.mayIndexError 3326 3327 self.errmsg = "Expected " + _ustr(self) 3328 3329 return self
3330
3331 - def setResultsName( self, name, listAllMatches=False ):
3332 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3333 return ret
3334
3335 - def validate( self, validateTrace=[] ):
3336 tmp = validateTrace[:]+[self] 3337 for e in self.exprs: 3338 e.validate(tmp) 3339 self.checkRecursion( [] )
3340
3341 - def copy(self):
3342 ret = super(ParseExpression,self).copy() 3343 ret.exprs = [e.copy() for e in self.exprs] 3344 return ret
3345
3346 -class And(ParseExpression):
3347 """ 3348 Requires all given C{ParseExpression}s to be found in the given order. 3349 Expressions may be separated by whitespace. 3350 May be constructed using the C{'+'} operator. 3351 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3352 3353 Example:: 3354 integer = Word(nums) 3355 name_expr = OneOrMore(Word(alphas)) 3356 3357 expr = And([integer("id"),name_expr("name"),integer("age")]) 3358 # more easily written as: 3359 expr = integer("id") + name_expr("name") + integer("age") 3360 """ 3361
3362 - class _ErrorStop(Empty):
3363 - def __init__(self, *args, **kwargs):
3364 super(And._ErrorStop,self).__init__(*args, **kwargs) 3365 self.name = '-' 3366 self.leaveWhitespace()
3367
3368 - def __init__( self, exprs, savelist = True ):
3369 super(And,self).__init__(exprs, savelist) 3370 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3371 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3372 self.skipWhitespace = self.exprs[0].skipWhitespace 3373 self.callPreparse = True
3374
3375 - def parseImpl( self, instring, loc, doActions=True ):
3376 # pass False as last arg to _parse for first element, since we already 3377 # pre-parsed the string as part of our And pre-parsing 3378 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3379 errorStop = False 3380 for e in self.exprs[1:]: 3381 if isinstance(e, And._ErrorStop): 3382 errorStop = True 3383 continue 3384 if errorStop: 3385 try: 3386 loc, exprtokens = e._parse( instring, loc, doActions ) 3387 except ParseSyntaxException: 3388 raise 3389 except ParseBaseException as pe: 3390 pe.__traceback__ = None 3391 raise ParseSyntaxException._from_exception(pe) 3392 except IndexError: 3393 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3394 else: 3395 loc, exprtokens = e._parse( instring, loc, doActions ) 3396 if exprtokens or exprtokens.haskeys(): 3397 resultlist += exprtokens 3398 return loc, resultlist
3399
3400 - def __iadd__(self, other ):
3401 if isinstance( other, basestring ): 3402 other = ParserElement._literalStringClass( other ) 3403 return self.append( other ) #And( [ self, other ] )
3404
3405 - def checkRecursion( self, parseElementList ):
3406 subRecCheckList = parseElementList[:] + [ self ] 3407 for e in self.exprs: 3408 e.checkRecursion( subRecCheckList ) 3409 if not e.mayReturnEmpty: 3410 break
3411
3412 - def __str__( self ):
3413 if hasattr(self,"name"): 3414 return self.name 3415 3416 if self.strRepr is None: 3417 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3418 3419 return self.strRepr
3420
3421 3422 -class Or(ParseExpression):
3423 """ 3424 Requires that at least one C{ParseExpression} is found. 3425 If two expressions match, the expression that matches the longest string will be used. 3426 May be constructed using the C{'^'} operator. 3427 3428 Example:: 3429 # construct Or using '^' operator 3430 3431 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3432 print(number.searchString("123 3.1416 789")) 3433 prints:: 3434 [['123'], ['3.1416'], ['789']] 3435 """
3436 - def __init__( self, exprs, savelist = False ):
3437 super(Or,self).__init__(exprs, savelist) 3438 if self.exprs: 3439 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3440 else: 3441 self.mayReturnEmpty = True
3442
3443 - def parseImpl( self, instring, loc, doActions=True ):
3444 maxExcLoc = -1 3445 maxException = None 3446 matches = [] 3447 for e in self.exprs: 3448 try: 3449 loc2 = e.tryParse( instring, loc ) 3450 except ParseException as err: 3451 err.__traceback__ = None 3452 if err.loc > maxExcLoc: 3453 maxException = err 3454 maxExcLoc = err.loc 3455 except IndexError: 3456 if len(instring) > maxExcLoc: 3457 maxException = ParseException(instring,len(instring),e.errmsg,self) 3458 maxExcLoc = len(instring) 3459 else: 3460 # save match among all matches, to retry longest to shortest 3461 matches.append((loc2, e)) 3462 3463 if matches: 3464 matches.sort(key=lambda x: -x[0]) 3465 for _,e in matches: 3466 try: 3467 return e._parse( instring, loc, doActions ) 3468 except ParseException as err: 3469 err.__traceback__ = None 3470 if err.loc > maxExcLoc: 3471 maxException = err 3472 maxExcLoc = err.loc 3473 3474 if maxException is not None: 3475 maxException.msg = self.errmsg 3476 raise maxException 3477 else: 3478 raise ParseException(instring, loc, "no defined alternatives to match", self)
3479 3480
3481 - def __ixor__(self, other ):
3482 if isinstance( other, basestring ): 3483 other = ParserElement._literalStringClass( other ) 3484 return self.append( other ) #Or( [ self, other ] )
3485
3486 - def __str__( self ):
3487 if hasattr(self,"name"): 3488 return self.name 3489 3490 if self.strRepr is None: 3491 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3492 3493 return self.strRepr
3494
3495 - def checkRecursion( self, parseElementList ):
3496 subRecCheckList = parseElementList[:] + [ self ] 3497 for e in self.exprs: 3498 e.checkRecursion( subRecCheckList )
3499
3500 3501 -class MatchFirst(ParseExpression):
3502 """ 3503 Requires that at least one C{ParseExpression} is found. 3504 If two expressions match, the first one listed is the one that will match. 3505 May be constructed using the C{'|'} operator. 3506 3507 Example:: 3508 # construct MatchFirst using '|' operator 3509 3510 # watch the order of expressions to match 3511 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3512 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3513 3514 # put more selective expression first 3515 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3516 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3517 """
3518 - def __init__( self, exprs, savelist = False ):
3519 super(MatchFirst,self).__init__(exprs, savelist) 3520 if self.exprs: 3521 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3522 else: 3523 self.mayReturnEmpty = True
3524
3525 - def parseImpl( self, instring, loc, doActions=True ):
3526 maxExcLoc = -1 3527 maxException = None 3528 for e in self.exprs: 3529 try: 3530 ret = e._parse( instring, loc, doActions ) 3531 return ret 3532 except ParseException as err: 3533 if err.loc > maxExcLoc: 3534 maxException = err 3535 maxExcLoc = err.loc 3536 except IndexError: 3537 if len(instring) > maxExcLoc: 3538 maxException = ParseException(instring,len(instring),e.errmsg,self) 3539 maxExcLoc = len(instring) 3540 3541 # only got here if no expression matched, raise exception for match that made it the furthest 3542 else: 3543 if maxException is not None: 3544 maxException.msg = self.errmsg 3545 raise maxException 3546 else: 3547 raise ParseException(instring, loc, "no defined alternatives to match", self)
3548
3549 - def __ior__(self, other ):
3550 if isinstance( other, basestring ): 3551 other = ParserElement._literalStringClass( other ) 3552 return self.append( other ) #MatchFirst( [ self, other ] )
3553
3554 - def __str__( self ):
3555 if hasattr(self,"name"): 3556 return self.name 3557 3558 if self.strRepr is None: 3559 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3560 3561 return self.strRepr
3562
3563 - def checkRecursion( self, parseElementList ):
3564 subRecCheckList = parseElementList[:] + [ self ] 3565 for e in self.exprs: 3566 e.checkRecursion( subRecCheckList )
3567
3568 3569 -class Each(ParseExpression):
3570 """ 3571 Requires all given C{ParseExpression}s to be found, but in any order. 3572 Expressions may be separated by whitespace. 3573 May be constructed using the C{'&'} operator. 3574 3575 Example:: 3576 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3577 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3578 integer = Word(nums) 3579 shape_attr = "shape:" + shape_type("shape") 3580 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3581 color_attr = "color:" + color("color") 3582 size_attr = "size:" + integer("size") 3583 3584 # use Each (using operator '&') to accept attributes in any order 3585 # (shape and posn are required, color and size are optional) 3586 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3587 3588 shape_spec.runTests(''' 3589 shape: SQUARE color: BLACK posn: 100, 120 3590 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3591 color:GREEN size:20 shape:TRIANGLE posn:20,40 3592 ''' 3593 ) 3594 prints:: 3595 shape: SQUARE color: BLACK posn: 100, 120 3596 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3597 - color: BLACK 3598 - posn: ['100', ',', '120'] 3599 - x: 100 3600 - y: 120 3601 - shape: SQUARE 3602 3603 3604 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3605 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3606 - color: BLUE 3607 - posn: ['50', ',', '80'] 3608 - x: 50 3609 - y: 80 3610 - shape: CIRCLE 3611 - size: 50 3612 3613 3614 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3615 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3616 - color: GREEN 3617 - posn: ['20', ',', '40'] 3618 - x: 20 3619 - y: 40 3620 - shape: TRIANGLE 3621 - size: 20 3622 """
3623 - def __init__( self, exprs, savelist = True ):
3624 super(Each,self).__init__(exprs, savelist) 3625 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3626 self.skipWhitespace = True 3627 self.initExprGroups = True
3628
3629 - def parseImpl( self, instring, loc, doActions=True ):
3630 if self.initExprGroups: 3631 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3632 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3633 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3634 self.optionals = opt1 + opt2 3635 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3636 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3637 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3638 self.required += self.multirequired 3639 self.initExprGroups = False 3640 tmpLoc = loc 3641 tmpReqd = self.required[:] 3642 tmpOpt = self.optionals[:] 3643 matchOrder = [] 3644 3645 keepMatching = True 3646 while keepMatching: 3647 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3648 failed = [] 3649 for e in tmpExprs: 3650 try: 3651 tmpLoc = e.tryParse( instring, tmpLoc ) 3652 except ParseException: 3653 failed.append(e) 3654 else: 3655 matchOrder.append(self.opt1map.get(id(e),e)) 3656 if e in tmpReqd: 3657 tmpReqd.remove(e) 3658 elif e in tmpOpt: 3659 tmpOpt.remove(e) 3660 if len(failed) == len(tmpExprs): 3661 keepMatching = False 3662 3663 if tmpReqd: 3664 missing = ", ".join(_ustr(e) for e in tmpReqd) 3665 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3666 3667 # add any unmatched Optionals, in case they have default values defined 3668 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3669 3670 resultlist = [] 3671 for e in matchOrder: 3672 loc,results = e._parse(instring,loc,doActions) 3673 resultlist.append(results) 3674 3675 finalResults = sum(resultlist, ParseResults([])) 3676 return loc, finalResults
3677
3678 - def __str__( self ):
3679 if hasattr(self,"name"): 3680 return self.name 3681 3682 if self.strRepr is None: 3683 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3684 3685 return self.strRepr
3686
3687 - def checkRecursion( self, parseElementList ):
3688 subRecCheckList = parseElementList[:] + [ self ] 3689 for e in self.exprs: 3690 e.checkRecursion( subRecCheckList )
3691
3692 3693 -class ParseElementEnhance(ParserElement):
3694 """ 3695 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3696 """
3697 - def __init__( self, expr, savelist=False ):
3698 super(ParseElementEnhance,self).__init__(savelist) 3699 if isinstance( expr, basestring ): 3700 if issubclass(ParserElement._literalStringClass, Token): 3701 expr = ParserElement._literalStringClass(expr) 3702 else: 3703 expr = ParserElement._literalStringClass(Literal(expr)) 3704 self.expr = expr 3705 self.strRepr = None 3706 if expr is not None: 3707 self.mayIndexError = expr.mayIndexError 3708 self.mayReturnEmpty = expr.mayReturnEmpty 3709 self.setWhitespaceChars( expr.whiteChars ) 3710 self.skipWhitespace = expr.skipWhitespace 3711 self.saveAsList = expr.saveAsList 3712 self.callPreparse = expr.callPreparse 3713 self.ignoreExprs.extend(expr.ignoreExprs)
3714
3715 - def parseImpl( self, instring, loc, doActions=True ):
3716 if self.expr is not None: 3717 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3718 else: 3719 raise ParseException("",loc,self.errmsg,self)
3720
3721 - def leaveWhitespace( self ):
3722 self.skipWhitespace = False 3723 self.expr = self.expr.copy() 3724 if self.expr is not None: 3725 self.expr.leaveWhitespace() 3726 return self
3727
3728 - def ignore( self, other ):
3729 if isinstance( other, Suppress ): 3730 if other not in self.ignoreExprs: 3731 super( ParseElementEnhance, self).ignore( other ) 3732 if self.expr is not None: 3733 self.expr.ignore( self.ignoreExprs[-1] ) 3734 else: 3735 super( ParseElementEnhance, self).ignore( other ) 3736 if self.expr is not None: 3737 self.expr.ignore( self.ignoreExprs[-1] ) 3738 return self
3739
3740 - def streamline( self ):
3741 super(ParseElementEnhance,self).streamline() 3742 if self.expr is not None: 3743 self.expr.streamline() 3744 return self
3745
3746 - def checkRecursion( self, parseElementList ):
3747 if self in parseElementList: 3748 raise RecursiveGrammarException( parseElementList+[self] ) 3749 subRecCheckList = parseElementList[:] + [ self ] 3750 if self.expr is not None: 3751 self.expr.checkRecursion( subRecCheckList )
3752
3753 - def validate( self, validateTrace=[] ):
3754 tmp = validateTrace[:]+[self] 3755 if self.expr is not None: 3756 self.expr.validate(tmp) 3757 self.checkRecursion( [] )
3758
3759 - def __str__( self ):
3760 try: 3761 return super(ParseElementEnhance,self).__str__() 3762 except Exception: 3763 pass 3764 3765 if self.strRepr is None and self.expr is not None: 3766 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3767 return self.strRepr
3768
3769 3770 -class FollowedBy(ParseElementEnhance):
3771 """ 3772 Lookahead matching of the given parse expression. C{FollowedBy} 3773 does I{not} advance the parsing position within the input string, it only 3774 verifies that the specified parse expression matches at the current 3775 position. C{FollowedBy} always returns a null token list. 3776 3777 Example:: 3778 # use FollowedBy to match a label only if it is followed by a ':' 3779 data_word = Word(alphas) 3780 label = data_word + FollowedBy(':') 3781 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3782 3783 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3784 prints:: 3785 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3786 """
3787 - def __init__( self, expr ):
3788 super(FollowedBy,self).__init__(expr) 3789 self.mayReturnEmpty = True
3790
3791 - def parseImpl( self, instring, loc, doActions=True ):
3792 self.expr.tryParse( instring, loc ) 3793 return loc, []
3794
3795 3796 -class NotAny(ParseElementEnhance):
3797 """ 3798 Lookahead to disallow matching with the given parse expression. C{NotAny} 3799 does I{not} advance the parsing position within the input string, it only 3800 verifies that the specified parse expression does I{not} match at the current 3801 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 3802 always returns a null token list. May be constructed using the '~' operator. 3803 3804 Example:: 3805 3806 """
3807 - def __init__( self, expr ):
3808 super(NotAny,self).__init__(expr) 3809 #~ self.leaveWhitespace() 3810 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3811 self.mayReturnEmpty = True 3812 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3813
3814 - def parseImpl( self, instring, loc, doActions=True ):
3815 if self.expr.canParseNext(instring, loc): 3816 raise ParseException(instring, loc, self.errmsg, self) 3817 return loc, []
3818
3819 - def __str__( self ):
3820 if hasattr(self,"name"): 3821 return self.name 3822 3823 if self.strRepr is None: 3824 self.strRepr = "~{" + _ustr(self.expr) + "}" 3825 3826 return self.strRepr
3827
3828 -class _MultipleMatch(ParseElementEnhance):
3829 - def __init__( self, expr, stopOn=None):
3830 super(_MultipleMatch, self).__init__(expr) 3831 self.saveAsList = True 3832 ender = stopOn 3833 if isinstance(ender, basestring): 3834 ender = ParserElement._literalStringClass(ender) 3835 self.not_ender = ~ender if ender is not None else None
3836
3837 - def parseImpl( self, instring, loc, doActions=True ):
3838 self_expr_parse = self.expr._parse 3839 self_skip_ignorables = self._skipIgnorables 3840 check_ender = self.not_ender is not None 3841 if check_ender: 3842 try_not_ender = self.not_ender.tryParse 3843 3844 # must be at least one (but first see if we are the stopOn sentinel; 3845 # if so, fail) 3846 if check_ender: 3847 try_not_ender(instring, loc) 3848 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3849 try: 3850 hasIgnoreExprs = (not not self.ignoreExprs) 3851 while 1: 3852 if check_ender: 3853 try_not_ender(instring, loc) 3854 if hasIgnoreExprs: 3855 preloc = self_skip_ignorables( instring, loc ) 3856 else: 3857 preloc = loc 3858 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3859 if tmptokens or tmptokens.haskeys(): 3860 tokens += tmptokens 3861 except (ParseException,IndexError): 3862 pass 3863 3864 return loc, tokens
3865
3866 -class OneOrMore(_MultipleMatch):
3867 """ 3868 Repetition of one or more of the given expression. 3869 3870 Parameters: 3871 - expr - expression that must match one or more times 3872 - stopOn - (default=C{None}) - expression for a terminating sentinel 3873 (only required if the sentinel would ordinarily match the repetition 3874 expression) 3875 3876 Example:: 3877 data_word = Word(alphas) 3878 label = data_word + FollowedBy(':') 3879 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3880 3881 text = "shape: SQUARE posn: upper left color: BLACK" 3882 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3883 3884 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3885 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3886 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3887 3888 # could also be written as 3889 (attr_expr * (1,)).parseString(text).pprint() 3890 """ 3891
3892 - def __str__( self ):
3893 if hasattr(self,"name"): 3894 return self.name 3895 3896 if self.strRepr is None: 3897 self.strRepr = "{" + _ustr(self.expr) + "}..." 3898 3899 return self.strRepr
3900
3901 -class ZeroOrMore(_MultipleMatch):
3902 """ 3903 Optional repetition of zero or more of the given expression. 3904 3905 Parameters: 3906 - expr - expression that must match zero or more times 3907 - stopOn - (default=C{None}) - expression for a terminating sentinel 3908 (only required if the sentinel would ordinarily match the repetition 3909 expression) 3910 3911 Example: similar to L{OneOrMore} 3912 """
3913 - def __init__( self, expr, stopOn=None):
3914 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3915 self.mayReturnEmpty = True
3916
3917 - def parseImpl( self, instring, loc, doActions=True ):
3918 try: 3919 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3920 except (ParseException,IndexError): 3921 return loc, []
3922
3923 - def __str__( self ):
3924 if hasattr(self,"name"): 3925 return self.name 3926 3927 if self.strRepr is None: 3928 self.strRepr = "[" + _ustr(self.expr) + "]..." 3929 3930 return self.strRepr
3931
3932 -class _NullToken(object):
3933 - def __bool__(self):
3934 return False
3935 __nonzero__ = __bool__
3936 - def __str__(self):
3937 return ""
3938 3939 _optionalNotMatched = _NullToken()
3940 -class Optional(ParseElementEnhance):
3941 """ 3942 Optional matching of the given expression. 3943 3944 Parameters: 3945 - expr - expression that must match zero or more times 3946 - default (optional) - value to be returned if the optional expression is not found. 3947 3948 Example:: 3949 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3950 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3951 zip.runTests(''' 3952 # traditional ZIP code 3953 12345 3954 3955 # ZIP+4 form 3956 12101-0001 3957 3958 # invalid ZIP 3959 98765- 3960 ''') 3961 prints:: 3962 # traditional ZIP code 3963 12345 3964 ['12345'] 3965 3966 # ZIP+4 form 3967 12101-0001 3968 ['12101-0001'] 3969 3970 # invalid ZIP 3971 98765- 3972 ^ 3973 FAIL: Expected end of text (at char 5), (line:1, col:6) 3974 """
3975 - def __init__( self, expr, default=_optionalNotMatched ):
3976 super(Optional,self).__init__( expr, savelist=False ) 3977 self.saveAsList = self.expr.saveAsList 3978 self.defaultValue = default 3979 self.mayReturnEmpty = True
3980
3981 - def parseImpl( self, instring, loc, doActions=True ):
3982 try: 3983 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3984 except (ParseException,IndexError): 3985 if self.defaultValue is not _optionalNotMatched: 3986 if self.expr.resultsName: 3987 tokens = ParseResults([ self.defaultValue ]) 3988 tokens[self.expr.resultsName] = self.defaultValue 3989 else: 3990 tokens = [ self.defaultValue ] 3991 else: 3992 tokens = [] 3993 return loc, tokens
3994
3995 - def __str__( self ):
3996 if hasattr(self,"name"): 3997 return self.name 3998 3999 if self.strRepr is None: 4000 self.strRepr = "[" + _ustr(self.expr) + "]" 4001 4002 return self.strRepr
4003
4004 -class SkipTo(ParseElementEnhance):
4005 """ 4006 Token for skipping over all undefined text until the matched expression is found. 4007 4008 Parameters: 4009 - expr - target expression marking the end of the data to be skipped 4010 - include - (default=C{False}) if True, the target expression is also parsed 4011 (the skipped text and target expression are returned as a 2-element list). 4012 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 4013 comments) that might contain false matches to the target expression 4014 - failOn - (default=C{None}) define expressions that are not allowed to be 4015 included in the skipped test; if found before the target expression is found, 4016 the SkipTo is not a match 4017 4018 Example:: 4019 report = ''' 4020 Outstanding Issues Report - 1 Jan 2000 4021 4022 # | Severity | Description | Days Open 4023 -----+----------+-------------------------------------------+----------- 4024 101 | Critical | Intermittent system crash | 6 4025 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4026 79 | Minor | System slow when running too many reports | 47 4027 ''' 4028 integer = Word(nums) 4029 SEP = Suppress('|') 4030 # use SkipTo to simply match everything up until the next SEP 4031 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4032 # - parse action will call token.strip() for each matched token, i.e., the description body 4033 string_data = SkipTo(SEP, ignore=quotedString) 4034 string_data.setParseAction(tokenMap(str.strip)) 4035 ticket_expr = (integer("issue_num") + SEP 4036 + string_data("sev") + SEP 4037 + string_data("desc") + SEP 4038 + integer("days_open")) 4039 4040 for tkt in ticket_expr.searchString(report): 4041 print tkt.dump() 4042 prints:: 4043 ['101', 'Critical', 'Intermittent system crash', '6'] 4044 - days_open: 6 4045 - desc: Intermittent system crash 4046 - issue_num: 101 4047 - sev: Critical 4048 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4049 - days_open: 14 4050 - desc: Spelling error on Login ('log|n') 4051 - issue_num: 94 4052 - sev: Cosmetic 4053 ['79', 'Minor', 'System slow when running too many reports', '47'] 4054 - days_open: 47 4055 - desc: System slow when running too many reports 4056 - issue_num: 79 4057 - sev: Minor 4058 """
4059 - def __init__( self, other, include=False, ignore=None, failOn=None ):
4060 super( SkipTo, self ).__init__( other ) 4061 self.ignoreExpr = ignore 4062 self.mayReturnEmpty = True 4063 self.mayIndexError = False 4064 self.includeMatch = include 4065 self.asList = False 4066 if isinstance(failOn, basestring): 4067 self.failOn = ParserElement._literalStringClass(failOn) 4068 else: 4069 self.failOn = failOn 4070 self.errmsg = "No match found for "+_ustr(self.expr)
4071
4072 - def parseImpl( self, instring, loc, doActions=True ):
4073 startloc = loc 4074 instrlen = len(instring) 4075 expr = self.expr 4076 expr_parse = self.expr._parse 4077 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4078 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4079 4080 tmploc = loc 4081 while tmploc <= instrlen: 4082 if self_failOn_canParseNext is not None: 4083 # break if failOn expression matches 4084 if self_failOn_canParseNext(instring, tmploc): 4085 break 4086 4087 if self_ignoreExpr_tryParse is not None: 4088 # advance past ignore expressions 4089 while 1: 4090 try: 4091 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4092 except ParseBaseException: 4093 break 4094 4095 try: 4096 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4097 except (ParseException, IndexError): 4098 # no match, advance loc in string 4099 tmploc += 1 4100 else: 4101 # matched skipto expr, done 4102 break 4103 4104 else: 4105 # ran off the end of the input string without matching skipto expr, fail 4106 raise ParseException(instring, loc, self.errmsg, self) 4107 4108 # build up return values 4109 loc = tmploc 4110 skiptext = instring[startloc:loc] 4111 skipresult = ParseResults(skiptext) 4112 4113 if self.includeMatch: 4114 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 4115 skipresult += mat 4116 4117 return loc, skipresult
4118
4119 -class Forward(ParseElementEnhance):
4120 """ 4121 Forward declaration of an expression to be defined later - 4122 used for recursive grammars, such as algebraic infix notation. 4123 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 4124 4125 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 4126 Specifically, '|' has a lower precedence than '<<', so that:: 4127 fwdExpr << a | b | c 4128 will actually be evaluated as:: 4129 (fwdExpr << a) | b | c 4130 thereby leaving b and c out as parseable alternatives. It is recommended that you 4131 explicitly group the values inserted into the C{Forward}:: 4132 fwdExpr << (a | b | c) 4133 Converting to use the '<<=' operator instead will avoid this problem. 4134 4135 See L{ParseResults.pprint} for an example of a recursive parser created using 4136 C{Forward}. 4137 """
4138 - def __init__( self, other=None ):
4139 super(Forward,self).__init__( other, savelist=False )
4140
4141 - def __lshift__( self, other ):
4142 if isinstance( other, basestring ): 4143 other = ParserElement._literalStringClass(other) 4144 self.expr = other 4145 self.strRepr = None 4146 self.mayIndexError = self.expr.mayIndexError 4147 self.mayReturnEmpty = self.expr.mayReturnEmpty 4148 self.setWhitespaceChars( self.expr.whiteChars ) 4149 self.skipWhitespace = self.expr.skipWhitespace 4150 self.saveAsList = self.expr.saveAsList 4151 self.ignoreExprs.extend(self.expr.ignoreExprs) 4152 return self
4153
4154 - def __ilshift__(self, other):
4155 return self << other
4156
4157 - def leaveWhitespace( self ):
4158 self.skipWhitespace = False 4159 return self
4160
4161 - def streamline( self ):
4162 if not self.streamlined: 4163 self.streamlined = True 4164 if self.expr is not None: 4165 self.expr.streamline() 4166 return self
4167
4168 - def validate( self, validateTrace=[] ):
4169 if self not in validateTrace: 4170 tmp = validateTrace[:]+[self] 4171 if self.expr is not None: 4172 self.expr.validate(tmp) 4173 self.checkRecursion([])
4174
4175 - def __str__( self ):
4176 if hasattr(self,"name"): 4177 return self.name 4178 return self.__class__.__name__ + ": ..." 4179 4180 # stubbed out for now - creates awful memory and perf issues 4181 self._revertClass = self.__class__ 4182 self.__class__ = _ForwardNoRecurse 4183 try: 4184 if self.expr is not None: 4185 retString = _ustr(self.expr) 4186 else: 4187 retString = "None" 4188 finally: 4189 self.__class__ = self._revertClass 4190 return self.__class__.__name__ + ": " + retString
4191
4192 - def copy(self):
4193 if self.expr is not None: 4194 return super(Forward,self).copy() 4195 else: 4196 ret = Forward() 4197 ret <<= self 4198 return ret
4199
4200 -class _ForwardNoRecurse(Forward):
4201 - def __str__( self ):
4202 return "..."
4203
4204 -class TokenConverter(ParseElementEnhance):
4205 """ 4206 Abstract subclass of C{ParseExpression}, for converting parsed results. 4207 """
4208 - def __init__( self, expr, savelist=False ):
4209 super(TokenConverter,self).__init__( expr )#, savelist ) 4210 self.saveAsList = False
4211
4212 -class Combine(TokenConverter):
4213 """ 4214 Converter to concatenate all matching tokens to a single string. 4215 By default, the matching patterns must also be contiguous in the input string; 4216 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4217 4218 Example:: 4219 real = Word(nums) + '.' + Word(nums) 4220 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4221 # will also erroneously match the following 4222 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4223 4224 real = Combine(Word(nums) + '.' + Word(nums)) 4225 print(real.parseString('3.1416')) # -> ['3.1416'] 4226 # no match when there are internal spaces 4227 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4228 """
4229 - def __init__( self, expr, joinString="", adjacent=True ):
4230 super(Combine,self).__init__( expr ) 4231 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4232 if adjacent: 4233 self.leaveWhitespace() 4234 self.adjacent = adjacent 4235 self.skipWhitespace = True 4236 self.joinString = joinString 4237 self.callPreparse = True
4238
4239 - def ignore( self, other ):
4240 if self.adjacent: 4241 ParserElement.ignore(self, other) 4242 else: 4243 super( Combine, self).ignore( other ) 4244 return self
4245
4246 - def postParse( self, instring, loc, tokenlist ):
4247 retToks = tokenlist.copy() 4248 del retToks[:] 4249 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4250 4251 if self.resultsName and retToks.haskeys(): 4252 return [ retToks ] 4253 else: 4254 return retToks
4255
4256 -class Group(TokenConverter):
4257 """ 4258 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4259 4260 Example:: 4261 ident = Word(alphas) 4262 num = Word(nums) 4263 term = ident | num 4264 func = ident + Optional(delimitedList(term)) 4265 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4266 4267 func = ident + Group(Optional(delimitedList(term))) 4268 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4269 """
4270 - def __init__( self, expr ):
4271 super(Group,self).__init__( expr ) 4272 self.saveAsList = True
4273
4274 - def postParse( self, instring, loc, tokenlist ):
4275 return [ tokenlist ]
4276
4277 -class Dict(TokenConverter):
4278 """ 4279 Converter to return a repetitive expression as a list, but also as a dictionary. 4280 Each element can also be referenced using the first token in the expression as its key. 4281 Useful for tabular report scraping when the first column can be used as a item key. 4282 4283 Example:: 4284 data_word = Word(alphas) 4285 label = data_word + FollowedBy(':') 4286 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4287 4288 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4289 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4290 4291 # print attributes as plain groups 4292 print(OneOrMore(attr_expr).parseString(text).dump()) 4293 4294 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4295 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4296 print(result.dump()) 4297 4298 # access named fields as dict entries, or output as dict 4299 print(result['shape']) 4300 print(result.asDict()) 4301 prints:: 4302 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4303 4304 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4305 - color: light blue 4306 - posn: upper left 4307 - shape: SQUARE 4308 - texture: burlap 4309 SQUARE 4310 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4311 See more examples at L{ParseResults} of accessing fields by results name. 4312 """
4313 - def __init__( self, expr ):
4314 super(Dict,self).__init__( expr ) 4315 self.saveAsList = True
4316
4317 - def postParse( self, instring, loc, tokenlist ):
4318 for i,tok in enumerate(tokenlist): 4319 if len(tok) == 0: 4320 continue 4321 ikey = tok[0] 4322 if isinstance(ikey,int): 4323 ikey = _ustr(tok[0]).strip() 4324 if len(tok)==1: 4325 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4326 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4327 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4328 else: 4329 dictvalue = tok.copy() #ParseResults(i) 4330 del dictvalue[0] 4331 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4332 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4333 else: 4334 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4335 4336 if self.resultsName: 4337 return [ tokenlist ] 4338 else: 4339 return tokenlist
4340
4341 4342 -class Suppress(TokenConverter):
4343 """ 4344 Converter for ignoring the results of a parsed expression. 4345 4346 Example:: 4347 source = "a, b, c,d" 4348 wd = Word(alphas) 4349 wd_list1 = wd + ZeroOrMore(',' + wd) 4350 print(wd_list1.parseString(source)) 4351 4352 # often, delimiters that are useful during parsing are just in the 4353 # way afterward - use Suppress to keep them out of the parsed output 4354 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4355 print(wd_list2.parseString(source)) 4356 prints:: 4357 ['a', ',', 'b', ',', 'c', ',', 'd'] 4358 ['a', 'b', 'c', 'd'] 4359 (See also L{delimitedList}.) 4360 """
4361 - def postParse( self, instring, loc, tokenlist ):
4362 return []
4363
4364 - def suppress( self ):
4365 return self
4366
4367 4368 -class OnlyOnce(object):
4369 """ 4370 Wrapper for parse actions, to ensure they are only called once. 4371 """
4372 - def __init__(self, methodCall):
4373 self.callable = _trim_arity(methodCall) 4374 self.called = False
4375 - def __call__(self,s,l,t):
4376 if not self.called: 4377 results = self.callable(s,l,t) 4378 self.called = True 4379 return results 4380 raise ParseException(s,l,"")
4381 - def reset(self):
4382 self.called = False
4383
4384 -def traceParseAction(f):
4385 """ 4386 Decorator for debugging parse actions. 4387 4388 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 4389 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 4390 4391 Example:: 4392 wd = Word(alphas) 4393 4394 @traceParseAction 4395 def remove_duplicate_chars(tokens): 4396 return ''.join(sorted(set(''.join(tokens))) 4397 4398 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4399 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4400 prints:: 4401 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4402 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4403 ['dfjkls'] 4404 """ 4405 f = _trim_arity(f) 4406 def z(*paArgs): 4407 thisFunc = f.__name__ 4408 s,l,t = paArgs[-3:] 4409 if len(paArgs)>3: 4410 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4411 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4412 try: 4413 ret = f(*paArgs) 4414 except Exception as exc: 4415 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4416 raise 4417 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4418 return ret
4419 try: 4420 z.__name__ = f.__name__ 4421 except AttributeError: 4422 pass 4423 return z 4424
4425 # 4426 # global helpers 4427 # 4428 -def delimitedList( expr, delim=",", combine=False ):
4429 """ 4430 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4431 By default, the list elements and delimiters can have intervening whitespace, and 4432 comments, but this can be overridden by passing C{combine=True} in the constructor. 4433 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4434 string, with the delimiters included; otherwise, the matching tokens are returned 4435 as a list of tokens, with the delimiters suppressed. 4436 4437 Example:: 4438 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4439 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4440 """ 4441 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4442 if combine: 4443 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4444 else: 4445 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4446
4447 -def countedArray( expr, intExpr=None ):
4448 """ 4449 Helper to define a counted list of expressions. 4450 This helper defines a pattern of the form:: 4451 integer expr expr expr... 4452 where the leading integer tells how many expr expressions follow. 4453 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4454 4455 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 4456 4457 Example:: 4458 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4459 4460 # in this parser, the leading integer value is given in binary, 4461 # '10' indicating that 2 values are in the array 4462 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 4463 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 4464 """ 4465 arrayExpr = Forward() 4466 def countFieldParseAction(s,l,t): 4467 n = t[0] 4468 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4469 return []
4470 if intExpr is None: 4471 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4472 else: 4473 intExpr = intExpr.copy() 4474 intExpr.setName("arrayLen") 4475 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4476 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4477
4478 -def _flatten(L):
4479 ret = [] 4480 for i in L: 4481 if isinstance(i,list): 4482 ret.extend(_flatten(i)) 4483 else: 4484 ret.append(i) 4485 return ret
4486
4487 -def matchPreviousLiteral(expr):
4488 """ 4489 Helper to define an expression that is indirectly defined from 4490 the tokens matched in a previous expression, that is, it looks 4491 for a 'repeat' of a previous expression. For example:: 4492 first = Word(nums) 4493 second = matchPreviousLiteral(first) 4494 matchExpr = first + ":" + second 4495 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4496 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4497 If this is not desired, use C{matchPreviousExpr}. 4498 Do I{not} use with packrat parsing enabled. 4499 """ 4500 rep = Forward() 4501 def copyTokenToRepeater(s,l,t): 4502 if t: 4503 if len(t) == 1: 4504 rep << t[0] 4505 else: 4506 # flatten t tokens 4507 tflat = _flatten(t.asList()) 4508 rep << And(Literal(tt) for tt in tflat) 4509 else: 4510 rep << Empty()
4511 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4512 rep.setName('(prev) ' + _ustr(expr)) 4513 return rep 4514
4515 -def matchPreviousExpr(expr):
4516 """ 4517 Helper to define an expression that is indirectly defined from 4518 the tokens matched in a previous expression, that is, it looks 4519 for a 'repeat' of a previous expression. For example:: 4520 first = Word(nums) 4521 second = matchPreviousExpr(first) 4522 matchExpr = first + ":" + second 4523 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4524 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 4525 the expressions are evaluated first, and then compared, so 4526 C{"1"} is compared with C{"10"}. 4527 Do I{not} use with packrat parsing enabled. 4528 """ 4529 rep = Forward() 4530 e2 = expr.copy() 4531 rep <<= e2 4532 def copyTokenToRepeater(s,l,t): 4533 matchTokens = _flatten(t.asList()) 4534 def mustMatchTheseTokens(s,l,t): 4535 theseTokens = _flatten(t.asList()) 4536 if theseTokens != matchTokens: 4537 raise ParseException("",0,"")
4538 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4539 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4540 rep.setName('(prev) ' + _ustr(expr)) 4541 return rep 4542
4543 -def _escapeRegexRangeChars(s):
4544 #~ escape these chars: ^-] 4545 for c in r"\^-]": 4546 s = s.replace(c,_bslash+c) 4547 s = s.replace("\n",r"\n") 4548 s = s.replace("\t",r"\t") 4549 return _ustr(s)
4550
4551 -def oneOf( strs, caseless=False, useRegex=True ):
4552 """ 4553 Helper to quickly define a set of alternative Literals, and makes sure to do 4554 longest-first testing when there is a conflict, regardless of the input order, 4555 but returns a C{L{MatchFirst}} for best performance. 4556 4557 Parameters: 4558 - strs - a string of space-delimited literals, or a collection of string literals 4559 - caseless - (default=C{False}) - treat all literals as caseless 4560 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4561 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4562 if creating a C{Regex} raises an exception) 4563 4564 Example:: 4565 comp_oper = oneOf("< = > <= >= !=") 4566 var = Word(alphas) 4567 number = Word(nums) 4568 term = var | number 4569 comparison_expr = term + comp_oper + term 4570 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4571 prints:: 4572 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4573 """ 4574 if caseless: 4575 isequal = ( lambda a,b: a.upper() == b.upper() ) 4576 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4577 parseElementClass = CaselessLiteral 4578 else: 4579 isequal = ( lambda a,b: a == b ) 4580 masks = ( lambda a,b: b.startswith(a) ) 4581 parseElementClass = Literal 4582 4583 symbols = [] 4584 if isinstance(strs,basestring): 4585 symbols = strs.split() 4586 elif isinstance(strs, collections.Iterable): 4587 symbols = list(strs) 4588 else: 4589 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4590 SyntaxWarning, stacklevel=2) 4591 if not symbols: 4592 return NoMatch() 4593 4594 i = 0 4595 while i < len(symbols)-1: 4596 cur = symbols[i] 4597 for j,other in enumerate(symbols[i+1:]): 4598 if ( isequal(other, cur) ): 4599 del symbols[i+j+1] 4600 break 4601 elif ( masks(cur, other) ): 4602 del symbols[i+j+1] 4603 symbols.insert(i,other) 4604 cur = other 4605 break 4606 else: 4607 i += 1 4608 4609 if not caseless and useRegex: 4610 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4611 try: 4612 if len(symbols)==len("".join(symbols)): 4613 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4614 else: 4615 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4616 except Exception: 4617 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4618 SyntaxWarning, stacklevel=2) 4619 4620 4621 # last resort, just use MatchFirst 4622 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4623
4624 -def dictOf( key, value ):
4625 """ 4626 Helper to easily and clearly define a dictionary by specifying the respective patterns 4627 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4628 in the proper order. The key pattern can include delimiting markers or punctuation, 4629 as long as they are suppressed, thereby leaving the significant key text. The value 4630 pattern can include named results, so that the C{Dict} results can include named token 4631 fields. 4632 4633 Example:: 4634 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4635 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4636 print(OneOrMore(attr_expr).parseString(text).dump()) 4637 4638 attr_label = label 4639 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4640 4641 # similar to Dict, but simpler call format 4642 result = dictOf(attr_label, attr_value).parseString(text) 4643 print(result.dump()) 4644 print(result['shape']) 4645 print(result.shape) # object attribute access works too 4646 print(result.asDict()) 4647 prints:: 4648 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4649 - color: light blue 4650 - posn: upper left 4651 - shape: SQUARE 4652 - texture: burlap 4653 SQUARE 4654 SQUARE 4655 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4656 """ 4657 return Dict( ZeroOrMore( Group ( key + value ) ) )
4658
4659 -def originalTextFor(expr, asString=True):
4660 """ 4661 Helper to return the original, untokenized text for a given expression. Useful to 4662 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4663 revert separate tokens with intervening whitespace back to the original matching 4664 input text. By default, returns astring containing the original parsed text. 4665 4666 If the optional C{asString} argument is passed as C{False}, then the return value is a 4667 C{L{ParseResults}} containing any results names that were originally matched, and a 4668 single token containing the original matched text from the input string. So if 4669 the expression passed to C{L{originalTextFor}} contains expressions with defined 4670 results names, you must set C{asString} to C{False} if you want to preserve those 4671 results name values. 4672 4673 Example:: 4674 src = "this is test <b> bold <i>text</i> </b> normal text " 4675 for tag in ("b","i"): 4676 opener,closer = makeHTMLTags(tag) 4677 patt = originalTextFor(opener + SkipTo(closer) + closer) 4678 print(patt.searchString(src)[0]) 4679 prints:: 4680 ['<b> bold <i>text</i> </b>'] 4681 ['<i>text</i>'] 4682 """ 4683 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4684 endlocMarker = locMarker.copy() 4685 endlocMarker.callPreparse = False 4686 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4687 if asString: 4688 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4689 else: 4690 def extractText(s,l,t): 4691 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4692 matchExpr.setParseAction(extractText) 4693 matchExpr.ignoreExprs = expr.ignoreExprs 4694 return matchExpr 4695
4696 -def ungroup(expr):
4697 """ 4698 Helper to undo pyparsing's default grouping of And expressions, even 4699 if all but one are non-empty. 4700 """ 4701 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4702
4703 -def locatedExpr(expr):
4704 """ 4705 Helper to decorate a returned token with its starting and ending locations in the input string. 4706 This helper adds the following results names: 4707 - locn_start = location where matched expression begins 4708 - locn_end = location where matched expression ends 4709 - value = the actual parsed results 4710 4711 Be careful if the input text contains C{<TAB>} characters, you may want to call 4712 C{L{ParserElement.parseWithTabs}} 4713 4714 Example:: 4715 wd = Word(alphas) 4716 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4717 print(match) 4718 prints:: 4719 [[0, 'ljsdf', 5]] 4720 [[8, 'lksdjjf', 15]] 4721 [[18, 'lkkjj', 23]] 4722 """ 4723 locator = Empty().setParseAction(lambda s,l,t: l) 4724 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4725 4726 4727 # convenience constants for positional expressions 4728 empty = Empty().setName("empty") 4729 lineStart = LineStart().setName("lineStart") 4730 lineEnd = LineEnd().setName("lineEnd") 4731 stringStart = StringStart().setName("stringStart") 4732 stringEnd = StringEnd().setName("stringEnd") 4733 4734 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4735 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4736 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4737 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 4738 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4739 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4740 4741 -def srange(s):
4742 r""" 4743 Helper to easily define string ranges for use in Word construction. Borrows 4744 syntax from regexp '[]' string range definitions:: 4745 srange("[0-9]") -> "0123456789" 4746 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4747 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4748 The input string must be enclosed in []'s, and the returned string is the expanded 4749 character set joined into a single string. 4750 The values enclosed in the []'s may be: 4751 - a single character 4752 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4753 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4754 (C{\0x##} is also supported for backwards compatibility) 4755 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4756 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4757 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4758 """ 4759 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4760 try: 4761 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4762 except Exception: 4763 return ""
4764
4765 -def matchOnlyAtCol(n):
4766 """ 4767 Helper method for defining parse actions that require matching at a specific 4768 column in the input text. 4769 """ 4770 def verifyCol(strg,locn,toks): 4771 if col(locn,strg) != n: 4772 raise ParseException(strg,locn,"matched token not at column %d" % n)
4773 return verifyCol 4774
4775 -def replaceWith(replStr):
4776 """ 4777 Helper method for common parse actions that simply return a literal value. Especially 4778 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4779 4780 Example:: 4781 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4782 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4783 term = na | num 4784 4785 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4786 """ 4787 return lambda s,l,t: [replStr]
4788
4789 -def removeQuotes(s,l,t):
4790 """ 4791 Helper parse action for removing quotation marks from parsed quoted strings. 4792 4793 Example:: 4794 # by default, quotation marks are included in parsed results 4795 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4796 4797 # use removeQuotes to strip quotation marks from parsed results 4798 quotedString.setParseAction(removeQuotes) 4799 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4800 """ 4801 return t[0][1:-1]
4802
4803 -def tokenMap(func, *args):
4804 """ 4805 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4806 args are passed, they are forwarded to the given function as additional arguments after 4807 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4808 parsed data to an integer using base 16. 4809 4810 Example (compare the last to example in L{ParserElement.transformString}:: 4811 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4812 hex_ints.runTests(''' 4813 00 11 22 aa FF 0a 0d 1a 4814 ''') 4815 4816 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4817 OneOrMore(upperword).runTests(''' 4818 my kingdom for a horse 4819 ''') 4820 4821 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4822 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4823 now is the winter of our discontent made glorious summer by this sun of york 4824 ''') 4825 prints:: 4826 00 11 22 aa FF 0a 0d 1a 4827 [0, 17, 34, 170, 255, 10, 13, 26] 4828 4829 my kingdom for a horse 4830 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4831 4832 now is the winter of our discontent made glorious summer by this sun of york 4833 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4834 """ 4835 def pa(s,l,t): 4836 return [func(tokn, *args) for tokn in t]
4837 4838 try: 4839 func_name = getattr(func, '__name__', 4840 getattr(func, '__class__').__name__) 4841 except Exception: 4842 func_name = str(func) 4843 pa.__name__ = func_name 4844 4845 return pa 4846 4847 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4848 """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 4849 4850 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4851 """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4852 4853 -def _makeTags(tagStr, xml):
4854 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4855 if isinstance(tagStr,basestring): 4856 resname = tagStr 4857 tagStr = Keyword(tagStr, caseless=not xml) 4858 else: 4859 resname = tagStr.name 4860 4861 tagAttrName = Word(alphas,alphanums+"_-:") 4862 if (xml): 4863 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4864 openTag = Suppress("<") + tagStr("tag") + \ 4865 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4866 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4867 else: 4868 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4869 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4870 openTag = Suppress("<") + tagStr("tag") + \ 4871 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4872 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4873 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4874 closeTag = Combine(_L("</") + tagStr + ">") 4875 4876 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4877 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4878 openTag.tag = resname 4879 closeTag.tag = resname 4880 return openTag, closeTag
4881
4882 -def makeHTMLTags(tagStr):
4883 """ 4884 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4885 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4886 4887 Example:: 4888 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4889 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4890 a,a_end = makeHTMLTags("A") 4891 link_expr = a + SkipTo(a_end)("link_text") + a_end 4892 4893 for link in link_expr.searchString(text): 4894 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4895 print(link.link_text, '->', link.href) 4896 prints:: 4897 pyparsing -> http://pyparsing.wikispaces.com 4898 """ 4899 return _makeTags( tagStr, False )
4900
4901 -def makeXMLTags(tagStr):
4902 """ 4903 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4904 tags only in the given upper/lower case. 4905 4906 Example: similar to L{makeHTMLTags} 4907 """ 4908 return _makeTags( tagStr, True )
4909
4910 -def withAttribute(*args,**attrDict):
4911 """ 4912 Helper to create a validating parse action to be used with start tags created 4913 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4914 with a required attribute value, to avoid false matches on common tags such as 4915 C{<TD>} or C{<DIV>}. 4916 4917 Call C{withAttribute} with a series of attribute names and values. Specify the list 4918 of filter attributes names and values as: 4919 - keyword arguments, as in C{(align="right")}, or 4920 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4921 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4922 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4923 For attribute names with a namespace prefix, you must use the second form. Attribute 4924 names are matched insensitive to upper/lower case. 4925 4926 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4927 4928 To verify that the attribute exists, but without specifying a value, pass 4929 C{withAttribute.ANY_VALUE} as the value. 4930 4931 Example:: 4932 html = ''' 4933 <div> 4934 Some text 4935 <div type="grid">1 4 0 1 0</div> 4936 <div type="graph">1,3 2,3 1,1</div> 4937 <div>this has no type</div> 4938 </div> 4939 4940 ''' 4941 div,div_end = makeHTMLTags("div") 4942 4943 # only match div tag having a type attribute with value "grid" 4944 div_grid = div().setParseAction(withAttribute(type="grid")) 4945 grid_expr = div_grid + SkipTo(div | div_end)("body") 4946 for grid_header in grid_expr.searchString(html): 4947 print(grid_header.body) 4948 4949 # construct a match with any div tag having a type attribute, regardless of the value 4950 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4951 div_expr = div_any_type + SkipTo(div | div_end)("body") 4952 for div_header in div_expr.searchString(html): 4953 print(div_header.body) 4954 prints:: 4955 1 4 0 1 0 4956 4957 1 4 0 1 0 4958 1,3 2,3 1,1 4959 """ 4960 if args: 4961 attrs = args[:] 4962 else: 4963 attrs = attrDict.items() 4964 attrs = [(k,v) for k,v in attrs] 4965 def pa(s,l,tokens): 4966 for attrName,attrValue in attrs: 4967 if attrName not in tokens: 4968 raise ParseException(s,l,"no matching attribute " + attrName) 4969 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4970 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4971 (attrName, tokens[attrName], attrValue))
4972 return pa 4973 withAttribute.ANY_VALUE = object()
4974 4975 -def withClass(classname, namespace=''):
4976 """ 4977 Simplified version of C{L{withAttribute}} when matching on a div class - made 4978 difficult because C{class} is a reserved word in Python. 4979 4980 Example:: 4981 html = ''' 4982 <div> 4983 Some text 4984 <div class="grid">1 4 0 1 0</div> 4985 <div class="graph">1,3 2,3 1,1</div> 4986 <div>this &lt;div&gt; has no class</div> 4987 </div> 4988 4989 ''' 4990 div,div_end = makeHTMLTags("div") 4991 div_grid = div().setParseAction(withClass("grid")) 4992 4993 grid_expr = div_grid + SkipTo(div | div_end)("body") 4994 for grid_header in grid_expr.searchString(html): 4995 print(grid_header.body) 4996 4997 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 4998 div_expr = div_any_type + SkipTo(div | div_end)("body") 4999 for div_header in div_expr.searchString(html): 5000 print(div_header.body) 5001 prints:: 5002 1 4 0 1 0 5003 5004 1 4 0 1 0 5005 1,3 2,3 1,1 5006 """ 5007 classattr = "%s:class" % namespace if namespace else "class" 5008 return withAttribute(**{classattr : classname})
5009 5010 opAssoc = _Constants() 5011 opAssoc.LEFT = object() 5012 opAssoc.RIGHT = object()
5013 5014 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5015 """ 5016 Helper method for constructing grammars of expressions made up of 5017 operators working in a precedence hierarchy. Operators may be unary or 5018 binary, left- or right-associative. Parse actions can also be attached 5019 to operator expressions. The generated parser will also recognize the use 5020 of parentheses to override operator precedences (see example below). 5021 5022 Note: if you define a deep operator list, you may see performance issues 5023 when using infixNotation. See L{ParserElement.enablePackrat} for a 5024 mechanism to potentially improve your parser performance. 5025 5026 Parameters: 5027 - baseExpr - expression representing the most basic element for the nested 5028 - opList - list of tuples, one for each operator precedence level in the 5029 expression grammar; each tuple is of the form 5030 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 5031 - opExpr is the pyparsing expression for the operator; 5032 may also be a string, which will be converted to a Literal; 5033 if numTerms is 3, opExpr is a tuple of two expressions, for the 5034 two operators separating the 3 terms 5035 - numTerms is the number of terms for this operator (must 5036 be 1, 2, or 3) 5037 - rightLeftAssoc is the indicator whether the operator is 5038 right or left associative, using the pyparsing-defined 5039 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 5040 - parseAction is the parse action to be associated with 5041 expressions matching this operator expression (the 5042 parse action tuple member may be omitted); if the parse action 5043 is passed a tuple or list of functions, this is equivalent to 5044 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 5045 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 5046 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 5047 5048 Example:: 5049 # simple example of four-function arithmetic with ints and variable names 5050 integer = pyparsing_common.signed_integer 5051 varname = pyparsing_common.identifier 5052 5053 arith_expr = infixNotation(integer | varname, 5054 [ 5055 ('-', 1, opAssoc.RIGHT), 5056 (oneOf('* /'), 2, opAssoc.LEFT), 5057 (oneOf('+ -'), 2, opAssoc.LEFT), 5058 ]) 5059 5060 arith_expr.runTests(''' 5061 5+3*6 5062 (5+3)*6 5063 -2--11 5064 ''', fullDump=False) 5065 prints:: 5066 5+3*6 5067 [[5, '+', [3, '*', 6]]] 5068 5069 (5+3)*6 5070 [[[5, '+', 3], '*', 6]] 5071 5072 -2--11 5073 [[['-', 2], '-', ['-', 11]]] 5074 """ 5075 ret = Forward() 5076 lastExpr = baseExpr | ( lpar + ret + rpar ) 5077 for i,operDef in enumerate(opList): 5078 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 5079 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 5080 if arity == 3: 5081 if opExpr is None or len(opExpr) != 2: 5082 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 5083 opExpr1, opExpr2 = opExpr 5084 thisExpr = Forward().setName(termName) 5085 if rightLeftAssoc == opAssoc.LEFT: 5086 if arity == 1: 5087 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 5088 elif arity == 2: 5089 if opExpr is not None: 5090 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 5091 else: 5092 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 5093 elif arity == 3: 5094 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 5095 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 5096 else: 5097 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5098 elif rightLeftAssoc == opAssoc.RIGHT: 5099 if arity == 1: 5100 # try to avoid LR with this extra test 5101 if not isinstance(opExpr, Optional): 5102 opExpr = Optional(opExpr) 5103 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 5104 elif arity == 2: 5105 if opExpr is not None: 5106 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 5107 else: 5108 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 5109 elif arity == 3: 5110 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 5111 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 5112 else: 5113 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 5114 else: 5115 raise ValueError("operator must indicate right or left associativity") 5116 if pa: 5117 if isinstance(pa, (tuple, list)): 5118 matchExpr.setParseAction(*pa) 5119 else: 5120 matchExpr.setParseAction(pa) 5121 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 5122 lastExpr = thisExpr 5123 ret <<= lastExpr 5124 return ret
5125 5126 operatorPrecedence = infixNotation 5127 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 5128 5129 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 5130 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 5131 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 5132 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 5133 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5134 5135 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5136 """ 5137 Helper method for defining nested lists enclosed in opening and closing 5138 delimiters ("(" and ")" are the default). 5139 5140 Parameters: 5141 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 5142 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 5143 - content - expression for items within the nested lists (default=C{None}) 5144 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 5145 5146 If an expression is not provided for the content argument, the nested 5147 expression will capture all whitespace-delimited content between delimiters 5148 as a list of separate values. 5149 5150 Use the C{ignoreExpr} argument to define expressions that may contain 5151 opening or closing characters that should not be treated as opening 5152 or closing characters for nesting, such as quotedString or a comment 5153 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 5154 The default is L{quotedString}, but if no expressions are to be ignored, 5155 then pass C{None} for this argument. 5156 5157 Example:: 5158 data_type = oneOf("void int short long char float double") 5159 decl_data_type = Combine(data_type + Optional(Word('*'))) 5160 ident = Word(alphas+'_', alphanums+'_') 5161 number = pyparsing_common.number 5162 arg = Group(decl_data_type + ident) 5163 LPAR,RPAR = map(Suppress, "()") 5164 5165 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5166 5167 c_function = (decl_data_type("type") 5168 + ident("name") 5169 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5170 + code_body("body")) 5171 c_function.ignore(cStyleComment) 5172 5173 source_code = ''' 5174 int is_odd(int x) { 5175 return (x%2); 5176 } 5177 5178 int dec_to_hex(char hchar) { 5179 if (hchar >= '0' && hchar <= '9') { 5180 return (ord(hchar)-ord('0')); 5181 } else { 5182 return (10+ord(hchar)-ord('A')); 5183 } 5184 } 5185 ''' 5186 for func in c_function.searchString(source_code): 5187 print("%(name)s (%(type)s) args: %(args)s" % func) 5188 5189 prints:: 5190 is_odd (int) args: [['int', 'x']] 5191 dec_to_hex (int) args: [['char', 'hchar']] 5192 """ 5193 if opener == closer: 5194 raise ValueError("opening and closing strings cannot be the same") 5195 if content is None: 5196 if isinstance(opener,basestring) and isinstance(closer,basestring): 5197 if len(opener) == 1 and len(closer)==1: 5198 if ignoreExpr is not None: 5199 content = (Combine(OneOrMore(~ignoreExpr + 5200 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5201 ).setParseAction(lambda t:t[0].strip())) 5202 else: 5203 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5204 ).setParseAction(lambda t:t[0].strip())) 5205 else: 5206 if ignoreExpr is not None: 5207 content = (Combine(OneOrMore(~ignoreExpr + 5208 ~Literal(opener) + ~Literal(closer) + 5209 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5210 ).setParseAction(lambda t:t[0].strip())) 5211 else: 5212 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5213 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5214 ).setParseAction(lambda t:t[0].strip())) 5215 else: 5216 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5217 ret = Forward() 5218 if ignoreExpr is not None: 5219 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5220 else: 5221 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5222 ret.setName('nested %s%s expression' % (opener,closer)) 5223 return ret
5224
5225 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5226 """ 5227 Helper method for defining space-delimited indentation blocks, such as 5228 those used to define block statements in Python source code. 5229 5230 Parameters: 5231 - blockStatementExpr - expression defining syntax of statement that 5232 is repeated within the indented block 5233 - indentStack - list created by caller to manage indentation stack 5234 (multiple statementWithIndentedBlock expressions within a single grammar 5235 should share a common indentStack) 5236 - indent - boolean indicating whether block must be indented beyond the 5237 the current level; set to False for block of left-most statements 5238 (default=C{True}) 5239 5240 A valid block must contain at least one C{blockStatement}. 5241 5242 Example:: 5243 data = ''' 5244 def A(z): 5245 A1 5246 B = 100 5247 G = A2 5248 A2 5249 A3 5250 B 5251 def BB(a,b,c): 5252 BB1 5253 def BBA(): 5254 bba1 5255 bba2 5256 bba3 5257 C 5258 D 5259 def spam(x,y): 5260 def eggs(z): 5261 pass 5262 ''' 5263 5264 5265 indentStack = [1] 5266 stmt = Forward() 5267 5268 identifier = Word(alphas, alphanums) 5269 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5270 func_body = indentedBlock(stmt, indentStack) 5271 funcDef = Group( funcDecl + func_body ) 5272 5273 rvalue = Forward() 5274 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5275 rvalue << (funcCall | identifier | Word(nums)) 5276 assignment = Group(identifier + "=" + rvalue) 5277 stmt << ( funcDef | assignment | identifier ) 5278 5279 module_body = OneOrMore(stmt) 5280 5281 parseTree = module_body.parseString(data) 5282 parseTree.pprint() 5283 prints:: 5284 [['def', 5285 'A', 5286 ['(', 'z', ')'], 5287 ':', 5288 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5289 'B', 5290 ['def', 5291 'BB', 5292 ['(', 'a', 'b', 'c', ')'], 5293 ':', 5294 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5295 'C', 5296 'D', 5297 ['def', 5298 'spam', 5299 ['(', 'x', 'y', ')'], 5300 ':', 5301 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5302 """ 5303 def checkPeerIndent(s,l,t): 5304 if l >= len(s): return 5305 curCol = col(l,s) 5306 if curCol != indentStack[-1]: 5307 if curCol > indentStack[-1]: 5308 raise ParseFatalException(s,l,"illegal nesting") 5309 raise ParseException(s,l,"not a peer entry")
5310 5311 def checkSubIndent(s,l,t): 5312 curCol = col(l,s) 5313 if curCol > indentStack[-1]: 5314 indentStack.append( curCol ) 5315 else: 5316 raise ParseException(s,l,"not a subentry") 5317 5318 def checkUnindent(s,l,t): 5319 if l >= len(s): return 5320 curCol = col(l,s) 5321 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5322 raise ParseException(s,l,"not an unindent") 5323 indentStack.pop() 5324 5325 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5326 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5327 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5328 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5329 if indent: 5330 smExpr = Group( Optional(NL) + 5331 #~ FollowedBy(blockStatementExpr) + 5332 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5333 else: 5334 smExpr = Group( Optional(NL) + 5335 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5336 blockStatementExpr.ignore(_bslash + LineEnd()) 5337 return smExpr.setName('indented block') 5338 5339 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5340 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5341 5342 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5343 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5344 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5345 -def replaceHTMLEntity(t):
5346 """Helper parser action to replace common HTML entities with their special characters""" 5347 return _htmlEntityMap.get(t.entity)
5348 5349 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5350 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5351 "Comment of the form C{/* ... */}" 5352 5353 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5354 "Comment of the form C{<!-- ... -->}" 5355 5356 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5357 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5358 "Comment of the form C{// ... (to end of line)}" 5359 5360 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5361 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5362 5363 javaStyleComment = cppStyleComment 5364 "Same as C{L{cppStyleComment}}" 5365 5366 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5367 "Comment of the form C{# ... (to end of line)}" 5368 5369 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5370 Optional( Word(" \t") + 5371 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5372 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5373 """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 5374 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5375 5376 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5377 -class pyparsing_common:
5378 """ 5379 Here are some common low-level expressions that may be useful in jump-starting parser development: 5380 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 5381 - common L{programming identifiers<identifier>} 5382 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5383 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5384 - L{UUID<uuid>} 5385 - L{comma-separated list<comma_separated_list>} 5386 Parse actions: 5387 - C{L{convertToInteger}} 5388 - C{L{convertToFloat}} 5389 - C{L{convertToDate}} 5390 - C{L{convertToDatetime}} 5391 - C{L{stripHTMLTags}} 5392 - C{L{upcaseTokens}} 5393 - C{L{downcaseTokens}} 5394 5395 Example:: 5396 pyparsing_common.number.runTests(''' 5397 # any int or real number, returned as the appropriate type 5398 100 5399 -100 5400 +100 5401 3.14159 5402 6.02e23 5403 1e-12 5404 ''') 5405 5406 pyparsing_common.fnumber.runTests(''' 5407 # any int or real number, returned as float 5408 100 5409 -100 5410 +100 5411 3.14159 5412 6.02e23 5413 1e-12 5414 ''') 5415 5416 pyparsing_common.hex_integer.runTests(''' 5417 # hex numbers 5418 100 5419 FF 5420 ''') 5421 5422 pyparsing_common.fraction.runTests(''' 5423 # fractions 5424 1/2 5425 -3/4 5426 ''') 5427 5428 pyparsing_common.mixed_integer.runTests(''' 5429 # mixed fractions 5430 1 5431 1/2 5432 -3/4 5433 1-3/4 5434 ''') 5435 5436 import uuid 5437 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5438 pyparsing_common.uuid.runTests(''' 5439 # uuid 5440 12345678-1234-5678-1234-567812345678 5441 ''') 5442 prints:: 5443 # any int or real number, returned as the appropriate type 5444 100 5445 [100] 5446 5447 -100 5448 [-100] 5449 5450 +100 5451 [100] 5452 5453 3.14159 5454 [3.14159] 5455 5456 6.02e23 5457 [6.02e+23] 5458 5459 1e-12 5460 [1e-12] 5461 5462 # any int or real number, returned as float 5463 100 5464 [100.0] 5465 5466 -100 5467 [-100.0] 5468 5469 +100 5470 [100.0] 5471 5472 3.14159 5473 [3.14159] 5474 5475 6.02e23 5476 [6.02e+23] 5477 5478 1e-12 5479 [1e-12] 5480 5481 # hex numbers 5482 100 5483 [256] 5484 5485 FF 5486 [255] 5487 5488 # fractions 5489 1/2 5490 [0.5] 5491 5492 -3/4 5493 [-0.75] 5494 5495 # mixed fractions 5496 1 5497 [1] 5498 5499 1/2 5500 [0.5] 5501 5502 -3/4 5503 [-0.75] 5504 5505 1-3/4 5506 [1.75] 5507 5508 # uuid 5509 12345678-1234-5678-1234-567812345678 5510 [UUID('12345678-1234-5678-1234-567812345678')] 5511 """ 5512 5513 convertToInteger = tokenMap(int) 5514 """ 5515 Parse action for converting parsed integers to Python int 5516 """ 5517 5518 convertToFloat = tokenMap(float) 5519 """ 5520 Parse action for converting parsed numbers to Python float 5521 """ 5522 5523 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5524 """expression that parses an unsigned integer, returns an int""" 5525 5526 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5527 """expression that parses a hexadecimal integer, returns an int""" 5528 5529 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5530 """expression that parses an integer with optional leading sign, returns an int""" 5531 5532 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 5533 """fractional expression of an integer divided by an integer, returns a float""" 5534 fraction.addParseAction(lambda t: t[0]/t[-1]) 5535 5536 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5537 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5538 mixed_integer.addParseAction(sum) 5539 5540 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5541 """expression that parses a floating point number and returns a float""" 5542 5543 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5544 """expression that parses a floating point number with optional scientific notation and returns a float""" 5545 5546 # streamlining this expression makes the docs nicer-looking 5547 number = (sci_real | real | signed_integer).streamline() 5548 """any numeric expression, returns the corresponding Python type""" 5549 5550 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5551 """any int or real number, returned as float""" 5552 5553 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5554 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5555 5556 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5557 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5558 5559 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5560 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5561 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5562 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5563 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5564 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5565 "IPv6 address (long, short, or mixed form)" 5566 5567 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5568 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5569 5570 @staticmethod
5571 - def convertToDate(fmt="%Y-%m-%d"):
5572 """ 5573 Helper to create a parse action for converting parsed date string to Python datetime.date 5574 5575 Params - 5576 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5577 5578 Example:: 5579 date_expr = pyparsing_common.iso8601_date.copy() 5580 date_expr.setParseAction(pyparsing_common.convertToDate()) 5581 print(date_expr.parseString("1999-12-31")) 5582 prints:: 5583 [datetime.date(1999, 12, 31)] 5584 """ 5585 def cvt_fn(s,l,t): 5586 try: 5587 return datetime.strptime(t[0], fmt).date() 5588 except ValueError as ve: 5589 raise ParseException(s, l, str(ve))
5590 return cvt_fn
5591 5592 @staticmethod
5593 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5594 """ 5595 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5596 5597 Params - 5598 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5599 5600 Example:: 5601 dt_expr = pyparsing_common.iso8601_datetime.copy() 5602 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5603 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5604 prints:: 5605 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5606 """ 5607 def cvt_fn(s,l,t): 5608 try: 5609 return datetime.strptime(t[0], fmt) 5610 except ValueError as ve: 5611 raise ParseException(s, l, str(ve))
5612 return cvt_fn 5613 5614 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5615 "ISO8601 date (C{yyyy-mm-dd})" 5616 5617 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5618 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5619 5620 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5621 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5622 5623 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5624 @staticmethod
5625 - def stripHTMLTags(s, l, tokens):
5626 """ 5627 Parse action to remove HTML tags from web page HTML source 5628 5629 Example:: 5630 # strip HTML links from normal text 5631 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5632 td,td_end = makeHTMLTags("TD") 5633 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5634 5635 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5636 """ 5637 return pyparsing_common._html_stripper.transformString(tokens[0])
5638 5639 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 5640 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 5641 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 5642 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 5643 5644 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5645 """Parse action to convert tokens to upper case.""" 5646 5647 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5648 """Parse action to convert tokens to lower case.""" 5649 5650 5651 if __name__ == "__main__": 5652 5653 selectToken = CaselessLiteral("select") 5654 fromToken = CaselessLiteral("from") 5655 5656 ident = Word(alphas, alphanums + "_$") 5657 5658 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5659 columnNameList = Group(delimitedList(columnName)).setName("columns") 5660 columnSpec = ('*' | columnNameList) 5661 5662 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5663 tableNameList = Group(delimitedList(tableName)).setName("tables") 5664 5665 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5666 5667 # demo runTests method, including embedded comments in test string 5668 simpleSQL.runTests(""" 5669 # '*' as column list and dotted table name 5670 select * from SYS.XYZZY 5671 5672 # caseless match on "SELECT", and casts back to "select" 5673 SELECT * from XYZZY, ABC 5674 5675 # list of column names, and mixed case SELECT keyword 5676 Select AA,BB,CC from Sys.dual 5677 5678 # multiple tables 5679 Select A, B, C from Sys.dual, Table2 5680 5681 # invalid SELECT keyword - should fail 5682 Xelect A, B, C from Sys.dual 5683 5684 # incomplete command - should fail 5685 Select 5686 5687 # invalid column name - should fail 5688 Select ^^^ frox Sys.dual 5689 5690 """) 5691 5692 pyparsing_common.number.runTests(""" 5693 100 5694 -100 5695 +100 5696 3.14159 5697 6.02e23 5698 1e-12 5699 """) 5700 5701 # any int or real number, returned as float 5702 pyparsing_common.fnumber.runTests(""" 5703 100 5704 -100 5705 +100 5706 3.14159 5707 6.02e23 5708 1e-12 5709 """) 5710 5711 pyparsing_common.hex_integer.runTests(""" 5712 100 5713 FF 5714 """) 5715 5716 import uuid 5717 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5718 pyparsing_common.uuid.runTests(""" 5719 12345678-1234-5678-1234-567812345678 5720 """) 5721