rivescript

1 #!/usr/bin/env python 2 # pyRiveScript - A RiveScript interpreter written in Python. 3 4 __author__ = 'Noah Petherbridge' 5 __copyright__ = 'Copyright 2013, Noah Petherbridge' 6 __credits__ = [ 7 'Noah Petherbridge', 8 'dinever' 9 ] 10 __license__ = 'GPL' 11 __maintainer__ = 'Noah Petherbridge' 12 __status__ = 'Production' 13 __docformat__ = 'plaintext' 14 15 __all__ = ['rivescript'] 16 __version__ = '1.02' 17 18 import os 19 import glob 20 import re 21 import string 22 import random 23 import pprint 24 import copy 25 26 from python import PyRiveObjects 27 28 # Common regular expressions. 29 re_equals = re.compile('\s*=\s*') 30 re_ws = re.compile('\s+') 31 re_objend = re.compile('<\s*object') 32 re_weight = re.compile('\{weight=(\d+)\}') 33 re_inherit = re.compile('\{inherits=(\d+)\}') 34 re_wilds = re.compile('[\s\*\#\_]+') 35 re_rot13 = re.compile('<rot13sub>(.+?)<bus31tor>') 36 re_nasties = re.compile('[^A-Za-z0-9 ]') 37 38 # Version of RiveScript we support. 39 rs_version = 2.0

40 41 -class RiveScript:

42 """A RiveScript interpreter for Python 2.""" 43 _debug = False # Debug mode 44 _strict = True # Strict mode 45 _logf = '' # Log file for debugging 46 _depth = 50 # Recursion depth limit 47 _gvars = {} # 'global' variables 48 _bvars = {} # 'bot' variables 49 _subs = {} # 'sub' variables 50 _person = {} # 'person' variables 51 _arrays = {} # 'array' variables 52 _users = {} # 'user' variables 53 _freeze = {} # frozen 'user' variables 54 _includes = {} # included topics 55 _lineage = {} # inherited topics 56 _handlers = {} # Object handlers 57 _objlangs = {} # Languages of objects used 58 _topics = {} # Main reply structure 59 _thats = {} # %Previous reply structure 60 _sorted = {} # Sorted buffers 61 62 ############################################################################ 63 # Initialization and Utility Methods # 64 ############################################################################ 65

66 - def __init__(self, debug=False, strict=True, depth=50, log=""):

67 """Initialize a new RiveScript interpreter. 68 69 bool debug: Specify a debug mode. 70 bool strict: Strict mode (RS syntax errors are fatal) 71 str log: Specify a log file for debug output to go to (instead of STDOUT). 72 int depth: Specify the recursion depth limit.""" 73 self._debug = debug 74 self._strict = strict 75 self._depth = depth 76 self._log = log 77 78 # Define the default Python language handler. 79 self._handlers["python"] = PyRiveObjects() 80 81 self._say("Interpreter initialized.")

82 83 @classmethod

84 - def VERSION(self=None):

85 """Return the version number of the RiveScript library. 86 87 This may be called as either a class method of a method of a RiveScript object.""" 88 return __version__

89

90 - def _say(self, message):

91 if self._debug: 92 print "[RS]", message 93 if self._log: 94 # Log it to the file. 95 fh = open(self._log, 'a') 96 fh.write("[RS] " + message + "\n") 97 fh.close()

98

99 - def _warn(self, message, fname='', lineno=0):

100 if self._debug: 101 print "[RS::Warning]", 102 else: 103 print "[RS]", 104 if len(fname) and lineno > 0: 105 print message, "at", fname, "line", lineno 106 else: 107 print message

108 109 ############################################################################ 110 # Loading and Parsing Methods # 111 ############################################################################ 112

113 - def load_directory(self, directory, ext='.rs'):

114 """Load RiveScript documents from a directory.""" 115 self._say("Loading from directory: " + directory + "/*" + ext) 116 117 if not os.path.isdir(directory): 118 self._warn("Error: " + directory + " is not a directory.") 119 return 120 121 for item in glob.glob( os.path.join(directory, '*'+ext) ): 122 self.load_file( item )

123

124 - def load_file(self, filename):

125 """Load and parse a RiveScript document.""" 126 self._say("Loading file: " + filename) 127 128 fh = open(filename, 'r') 129 lines = fh.readlines() 130 fh.close() 131 132 self._say("Parsing " + str(len(lines)) + " lines of code from " + filename) 133 self._parse(filename, lines)

134

135 - def stream(self, code):

136 """Stream in RiveScript source code dynamically. 137 138 `code` should be an array of lines of RiveScript code.""" 139 self._say("Streaming code.") 140 self._parse("stream()", code)

141

142 - def _parse(self, fname, code):

143 """Parse RiveScript code into memory.""" 144 self._say("Parsing code") 145 146 # Track temporary variables. 147 topic = 'random' # Default topic=random 148 lineno = 0 # Line numbers for syntax tracking 149 comment = False # In a multi-line comment 150 inobj = False # In an object 151 objname = '' # The name of the object we're in 152 objlang = '' # The programming language of the object 153 objbuf = [] # Object contents buffer 154 ontrig = '' # The current trigger 155 repcnt = 0 # Reply counter 156 concnt = 0 # Condition counter 157 lastcmd = '' # Last command code 158 isThat = '' # Is a %Previous trigger 159 160 # Read each line. 161 for lp, line in enumerate(code): 162 lineno = lineno + 1 163 164 self._say("Line: " + line + " (topic: " + topic + ") incomment: " + str(inobj)) 165 if len(line.strip()) == 0: # Skip blank lines 166 continue 167 168 # In an object? 169 if inobj: 170 if re.match(re_objend, line): 171 # End the object. 172 if len(objname): 173 # Call the object's handler. 174 if objlang in self._handlers: 175 self._objlangs[objname] = objlang; 176 self._handlers[objlang].load(objname, objbuf) 177 else: 178 self._warn("Object creation failed: no handler for " + objlang, fname, lineno) 179 objname = '' 180 objlang = '' 181 objbuf = [] 182 inobj = False 183 else: 184 objbuf.append(line) 185 continue 186 187 line = line.strip() # Trim excess space. We do it down here so we 188 # don't mess up python objects! 189 190 # Look for comments. 191 if line[:2] == '//': # A single-line comment. 192 continue 193 elif line[0] == '#': 194 self._warn("Using the # symbol for comments is deprecated", fname, lineno) 195 elif line[:2] == '/*': # Start of a multi-line comment. 196 if not '*/' in line: # Cancel if the end is here too. 197 comment = True 198 continue 199 elif '*/' in line: 200 comment = False 201 continue 202 if comment: 203 continue 204 205 # Separate the command from the data. 206 if len(line) < 2: 207 self._warn("Weird single-character line '" + line + "' found.", fname, lineno) 208 continue 209 cmd = line[0] 210 line = line[1:].strip() 211 212 # Ignore inline comments if there's a space before and after 213 # the // symbols. 214 if " // " in line: 215 line = line.split(" // ")[0].strip() 216 217 # Run a syntax check on this line. 218 syntax_error = self.check_syntax(cmd, line) 219 if syntax_error: 220 # There was a syntax error! Are we enforcing strict mode? 221 syntax_error = "Syntax error in " + fname + " line " + str(lineno) + ": " \ 222 + syntax_error + " (near: " + cmd + " " + line + ")" 223 if self._strict: 224 raise Exception(syntax_error) 225 else: 226 self._warn(syntax_error) 227 return # Don't try to continue 228 229 # Reset the %Previous state if this is a new +Trigger. 230 if cmd == '+': 231 isThat = '' 232 233 # Do a lookahead for ^Continue and %Previous commands. 234 for i in range(lp + 1, len(code)): 235 lookahead = code[i].strip() 236 if len(lookahead) < 2: 237 continue 238 lookCmd = lookahead[0] 239 lookahead = lookahead[1:].strip() 240 241 # Only continue if the lookahead line has any data. 242 if len(lookahead) != 0: 243 # The lookahead command has to be either a % or a ^. 244 if lookCmd != '^' and lookCmd != '%': 245 break 246 247 # If the current command is a +, see if the following is 248 # a %. 249 if cmd == '+': 250 if lookCmd == '%': 251 isThat = lookahead 252 break 253 else: 254 isThat = '' 255 256 # If the current command is a ! and the next command(s) are 257 # ^, we'll tack each extension on as a line break (which is 258 # useful information for arrays). 259 if cmd == '!': 260 if lookCmd == '^': 261 line += "<crlf>" + lookahead 262 continue 263 264 # If the current command is not a ^ and the line after is 265 # not a %, but the line after IS a ^, then tack it on to the 266 # end of the current line. 267 if cmd != '^' and lookCmd != '%': 268 if lookCmd == '^': 269 line += lookahead 270 else: 271 break 272 273 self._say("Command: " + cmd + "; line: " + line) 274 275 # Handle the types of RiveScript commands. 276 if cmd == '!': 277 # ! DEFINE 278 halves = re.split(re_equals, line, 2) 279 left = re.split(re_ws, halves[0].strip(), 2) 280 value, type, var = '', '', '' 281 if len(halves) == 2: 282 value = halves[1].strip() 283 if len(left) >= 1: 284 type = left[0].strip() 285 if len(left) >= 2: 286 var = ' '.join(left[1:]).strip() 287 288 # Remove 'fake' line breaks unless this is an array. 289 if type != 'array': 290 value = re.sub(r'<crlf>', '', value) 291 292 # Handle version numbers. 293 if type == 'version': 294 # Verify we support it. 295 try: 296 if float(value) > rs_version: 297 self._warn("Unsupported RiveScript version. We only support " + rs_version, fname, lineno) 298 return 299 except: 300 self._warn("Error parsing RiveScript version number: not a number", fname, lineno) 301 continue 302 303 # All other types of defines require a variable and value name. 304 if len(var) == 0: 305 self._warn("Undefined variable name", fname, lineno) 306 continue 307 elif len(value) == 0: 308 self._warn("Undefined variable value", fname, lineno) 309 continue 310 311 # Handle the rest of the types. 312 if type == 'global': 313 # 'Global' variables 314 self._say("\tSet global " + var + " = " + value) 315 316 if value == '<undef>': 317 try: 318 del(self._gvars[var]) 319 except: 320 self._warn("Failed to delete missing global variable", fname, lineno) 321 else: 322 self._gvars[var] = value 323 324 # Handle flipping debug and depth vars. 325 if var == 'debug': 326 if value.lower() == 'true': 327 value = True 328 else: 329 value = False 330 self._debug = value 331 elif var == 'depth': 332 try: 333 self._depth = int(value) 334 except: 335 self._warn("Failed to set 'depth' because the value isn't a number!", fname, lineno) 336 elif var == 'strict': 337 if value.lower() == 'true': 338 self._strict = True 339 else: 340 self._strict = False 341 elif type == 'var': 342 # Bot variables 343 self._say("\tSet bot variable " + var + " = " + value) 344 345 if value == '<undef>': 346 try: 347 del(self._bvars[var]) 348 except: 349 self._warn("Failed to delete missing bot variable", fname, lineno) 350 else: 351 self._bvars[var] = value 352 elif type == 'array': 353 # Arrays 354 self._say("\tArray " + var + " = " + value) 355 356 if value == '<undef>': 357 try: 358 del(self._arrays[var]) 359 except: 360 self._warn("Failed to delete missing array", fname, lineno) 361 continue 362 363 # Did this have multiple parts? 364 parts = value.split("<crlf>") 365 366 # Process each line of array data. 367 fields = [] 368 for val in parts: 369 if '|' in val: 370 fields.extend( val.split('|') ) 371 else: 372 fields.extend( re.split(re_ws, val) ) 373 374 # Convert any remaining '\s' escape codes into spaces. 375 for f in fields: 376 f = f.replace(r'\s', ' ') 377 378 self._arrays[var] = fields 379 elif type == 'sub': 380 # Substitutions 381 self._say("\tSubstitution " + var + " => " + value) 382 383 if value == '<undef>': 384 try: 385 del(self._subs[var]) 386 except: 387 self._warn("Failed to delete missing substitution", fname, lineno) 388 else: 389 self._subs[var] = value 390 elif type == 'person': 391 # Person Substitutions 392 self._say("\tPerson Substitution " + var + " => " + value) 393 394 if value == '<undef>': 395 try: 396 del(self._person[var]) 397 except: 398 self._warn("Failed to delete missing person substitution", fname, lineno) 399 else: 400 self._person[var] = value 401 else: 402 self._warn("Unknown definition type '" + type + "'", fname, lineno) 403 elif cmd == '>': 404 # > LABEL 405 temp = re.split(re_ws, line) 406 type = temp[0] 407 name = '' 408 fields = [] 409 if len(temp) >= 2: 410 name = temp[1] 411 if len(temp) >= 3: 412 fields = temp[2:] 413 414 # Handle the label types. 415 if type == 'begin': 416 # The BEGIN block. 417 self._say("\tFound the BEGIN block.") 418 type = 'topic' 419 name = '__begin__' 420 if type == 'topic': 421 # Starting a new topic. 422 self._say("\tSet topic to " + name) 423 ontrig = '' 424 topic = name 425 426 # Does this topic include or inherit another one? 427 mode = '' # or 'inherits' or 'includes' 428 if len(fields) >= 2: 429 for field in fields: 430 if field == 'includes': 431 mode = 'includes' 432 elif field == 'inherits': 433 mode = 'inherits' 434 elif mode != '': 435 # This topic is either inherited or included. 436 if mode == 'includes': 437 if not name in self._includes: 438 self._includes[name] = {} 439 self._includes[name][field] = 1 440 else: 441 if not name in self._lineage: 442 self._lineage[name] = {} 443 self._lineage[name][field] = 1 444 elif type == 'object': 445 # If a field was provided, it should be the programming 446 # language. 447 lang = None 448 if len(fields) > 0: 449 lang = fields[0].lower() 450 451 # Only try to parse a language we support. 452 ontrig = '' 453 if lang == None: 454 self._warn("Trying to parse unknown programming language", fname, fileno) 455 lang = 'python' # Assume it's Python. 456 457 # See if we have a defined handler for this language. 458 if lang in self._handlers: 459 # We have a handler, so start loading the code. 460 objname = name 461 objlang = lang 462 objbuf = [] 463 inobj = True 464 else: 465 # We don't have a handler, just ignore it. 466 objname = '' 467 objlang = '' 468 objbuf = [] 469 inobj = True 470 else: 471 self._warn("Unknown label type '" + type + "'", fname, lineno) 472 elif cmd == '<': 473 # < LABEL 474 type = line 475 476 if type == 'begin' or type == 'topic': 477 self._say("\tEnd topic label.") 478 topic = 'random' 479 elif type == 'object': 480 self._say("\tEnd object label.") 481 inobj = False 482 elif cmd == '+': 483 # + TRIGGER 484 self._say("\tTrigger pattern: " + line) 485 if len(isThat): 486 self._initTT('thats', topic, isThat, line) 487 else: 488 self._initTT('topics', topic, line) 489 ontrig = line 490 repcnt = 0 491 concnt = 0 492 elif cmd == '-': 493 # - REPLY 494 if ontrig == '': 495 self._warn("Response found before trigger", fname, lineno) 496 continue 497 self._say("\tResponse: " + line) 498 if len(isThat): 499 self._thats[topic][isThat][ontrig]['reply'][repcnt] = line 500 else: 501 self._topics[topic][ontrig]['reply'][repcnt] = line 502 repcnt = repcnt + 1 503 elif cmd == '%': 504 # % PREVIOUS 505 pass # This was handled above. 506 elif cmd == '^': 507 # ^ CONTINUE 508 pass # This was handled above. 509 elif cmd == '@': 510 # @ REDIRECT 511 self._say("\tRedirect response to " + line) 512 if len(isThat): 513 self._thats[topic][isThat][ontrig]['redirect'] = line 514 else: 515 self._topics[topic][ontrig]['redirect'] = line 516 elif cmd == '*': 517 # * CONDITION 518 self._say("\tAdding condition: " + line) 519 if len(isThat): 520 self._thats[topic][isThat][ontrig]['condition'][concnt] = line 521 else: 522 self._topics[topic][ontrig]['condition'][concnt] = line 523 concnt = concnt + 1 524 else: 525 self._warn("Unrecognized command \"" + cmd + "\"", fname, lineno) 526 continue

527

528 - def check_syntax(self, cmd, line):

529 """Syntax check a RiveScript command and line. 530 531 Returns a syntax error string on error; None otherwise.""" 532 533 # Run syntax checks based on the type of command. 534 if cmd == '!': 535 # ! Definition 536 # - Must be formatted like this: 537 # ! type name = value 538 # OR 539 # ! type = value 540 match = re.match(r'^.+(?:\s+.+|)\s*=\s*.+?$', line) 541 if not match: 542 return "Invalid format for !Definition line: must be '! type name = value' OR '! type = value'" 543 elif cmd == '>': 544 # > Label 545 # - The "begin" label must have only one argument ("begin") 546 # - "topic" labels must be lowercased but can inherit other topics (a-z0-9_\s) 547 # - "object" labels must follow the same rules as "topic", but don't need to be lowercase 548 parts = re.split(" ", line, 2) 549 if parts[0] == "begin" and len(parts) > 1: 550 return "The 'begin' label takes no additional arguments, should be verbatim '> begin'" 551 elif parts[0] == "topic": 552 rest = ' '.join(parts) 553 match = re.match(r'[^a-z0-9_\-\s]', line) 554 if match: 555 return "Topics should be lowercased and contain only numbers and letters" 556 elif parts[0] == "object": 557 rest = ' '.join(parts) 558 match = re.match(r'[^A-Za-z0-9_\-\s]', line) 559 if match: 560 return "Objects can only contain numbers and letters" 561 elif cmd == '+' or cmd == '%' or cmd == '@': 562 # + Trigger, % Previous, @ Redirect 563 # This one is strict. The triggers are to be run through the regexp engine, 564 # therefore it should be acceptable for the regexp engine. 565 # - Entirely lowercase 566 # - No symbols except: ( | ) [ ] * _ # @ { } < > = 567 # - All brackets should be matched 568 parens = 0 # Open parenthesis 569 square = 0 # Open square brackets 570 curly = 0 # Open curly brackets 571 angle = 0 # Open angled brackets 572 573 # Look for obvious errors. 574 match = re.match(r'[^a-z0-9(|)\[\]*_#@{}<>=\s]', line) 575 if match: 576 return "Triggers may only contain lowercase letters, numbers, and these symbols: ( | ) [ ] * _ # @ { } < > =" 577 578 # Count brackets. 579 for char in line: 580 if char == '(': 581 parens = parens + 1 582 elif char == ')': 583 parens = parens - 1 584 elif char == '[': 585 square = square + 1 586 elif char == ']': 587 square = square - 1 588 elif char == '{': 589 curly = curly + 1 590 elif char == '}': 591 curly = curly - 1 592 elif char == '<': 593 angle = angle + 1 594 elif char == '>': 595 angle = angle - 1 596 597 # Any mismatches? 598 if parens != 0: 599 return "Unmatched parenthesis brackets" 600 elif square != 0: 601 return "Unmatched square brackets" 602 elif curly != 0: 603 return "Unmatched curly brackets" 604 elif angle != 0: 605 return "Unmatched angle brackets" 606 elif cmd == '-' or cmd == '^' or cmd == '/': 607 # - Trigger, ^ Continue, / Comment 608 # These commands take verbatim arguments, so their syntax is loose. 609 pass 610 elif cmd == '*': 611 # * Condition 612 # Syntax for a conditional is as follows: 613 # * value symbol value => response 614 match = re.match(r'^.+?\s*(?:==|eq|!=|ne|<>|<|<=|>|>=)\s*.+?=>.+?$', line) 615 if not match: 616 return "Invalid format for !Condition: should be like '* value symbol value => response'" 617 618 return None

619

620 - def _initTT(self, toplevel, topic, trigger, what=''):

621 """Initialize a Topic Tree data structure.""" 622 if toplevel == 'topics': 623 if not topic in self._topics: 624 self._topics[topic] = {} 625 if not trigger in self._topics[topic]: 626 self._topics[topic][trigger] = {} 627 self._topics[topic][trigger]['reply'] = {} 628 self._topics[topic][trigger]['condition'] = {} 629 self._topics[topic][trigger]['redirect'] = None 630 elif toplevel == 'thats': 631 if not topic in self._thats: 632 self._thats[topic] = {} 633 if not trigger in self._thats[topic]: 634 self._thats[topic][trigger] = {} 635 if not what in self._thats[topic][trigger]: 636 self._thats[topic][trigger][what] = {} 637 self._thats[topic][trigger][what]['reply'] = {} 638 self._thats[topic][trigger][what]['condition'] = {} 639 self._thats[topic][trigger][what]['redirect'] = {}

640 641 ############################################################################ 642 # Sorting Methods # 643 ############################################################################ 644

645 - def sort_replies(self, thats=False):

646 """Sort the loaded triggers.""" 647 # This method can sort both triggers and that's. 648 triglvl = None 649 sortlvl = None 650 if thats: 651 triglvl = self._thats 652 sortlvl = 'thats' 653 else: 654 triglvl = self._topics 655 sortlvl = 'topics' 656 657 # (Re)Initialize the sort cache. 658 self._sorted[sortlvl] = {} 659 660 self._say("Sorting triggers...") 661 662 # Loop through all the topics. 663 for topic in triglvl: 664 self._say("Analyzing topic " + topic) 665 666 # Collect a list of all the triggers we're going to need to worry 667 # about. If this topic inherits another topic, we need to 668 # recursively add those to the list. 669 alltrig = self._topic_triggers(topic, triglvl) 670 671 # Keep in mind here that there is a difference between 'includes' 672 # and 'inherits' -- topics that inherit other topics are able to 673 # OVERRIDE triggers that appear in the inherited topic. This means 674 # that if the top topic has a trigger of simply '*', then *NO* 675 # triggers are capable of matching in ANY inherited topic, because 676 # even though * has the lowest sorting priority, it has an automatic 677 # priority over all inherited topics. 678 # 679 # The _topic_triggers method takes this into account. All topics 680 # that inherit other topics will have their triggers prefixed with 681 # a fictional {inherits} tag, which would start at {inherits=0} and 682 # increment if the topic tree has other inheriting topics. So we can 683 # use this tag to make sure topics that inherit things will have 684 # their triggers always be on top of the stack, from inherits=0 to 685 # inherits=n. 686 687 # Sort these triggers. 688 running = self._sort_trigger_set(alltrig) 689 690 # Save this topic's sorted list. 691 if not sortlvl in self._sorted: 692 self._sorted[sortlvl] = {} 693 self._sorted[sortlvl][topic] = running 694 695 # And do it all again for %Previous! 696 if thats != True: 697 # This will sort the %Previous lines to best match the bot's last reply. 698 self.sort_replies(True) 699 700 # If any of those %Previous's had more than one +trigger for them, 701 # this will sort all those +triggers to pair back the best human 702 # interaction. 703 self._sort_that_triggers() 704 705 # Also sort both kinds of substitutions. 706 self._sort_list('subs', self._subs) 707 self._sort_list('person', self._person)

708

709 - def _sort_that_triggers(self):

710 """Make a sorted list of triggers that correspond to %Previous groups.""" 711 self._say("Sorting reverse triggers for %Previous groups...") 712 713 if not "that_trig" in self._sorted: 714 self._sorted["that_trig"] = {} 715 716 for topic in self._thats: 717 if not topic in self._sorted["that_trig"]: 718 self._sorted["that_trig"][topic] = {} 719 720 for bottrig in self._thats[topic]: 721 if not bottrig in self._sorted["that_trig"][topic]: 722 self._sorted["that_trig"][topic][bottrig] = [] 723 triggers = self._sort_trigger_set(self._thats[topic][bottrig].keys()) 724 self._sorted["that_trig"][topic][bottrig] = triggers

725

726 - def _sort_trigger_set(self, triggers):

727 """Sort a group of triggers in optimal sorting order.""" 728 729 # Create a priority map. 730 prior = { 731 0: [] # Default priority=0 732 } 733 734 for trig in triggers: 735 match, weight = re.search(re_weight, trig), 0 736 if match: 737 weight = int(match.group(1)) 738 if not weight in prior: 739 prior[weight] = [] 740 741 prior[weight].append(trig) 742 743 # Keep a running list of sorted triggers for this topic. 744 running = [] 745 746 # Sort them by priority. 747 for p in sorted(prior.keys(), reverse=True): 748 self._say("\tSorting triggers with priority " + str(p)) 749 750 # So, some of these triggers may include {inherits} tags, if they 751 # came form a topic which inherits another topic. Lower inherits 752 # values mean higher priority on the stack. 753 inherits = -1 # -1 means no {inherits} tag 754 highest_inherits = -1 # highest inheritence number seen 755 756 # Loop through and categorize these triggers. 757 track = { 758 inherits: self._init_sort_track() 759 } 760 761 for trig in prior[p]: 762 self._say("\t\tLooking at trigger: " + trig) 763 764 # See if it has an inherits tag. 765 match = re.search(re_inherit, trig) 766 if match: 767 inherits = int(match.group(1)) 768 if inherits > highest_inherits: 769 highest_inherits = inherits 770 self._say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherits)) 771 trig = re.sub(re_inherit, "", trig) 772 else: 773 inherits = -1 774 775 # If this is the first time we've seen this inheritence level, 776 # initialize its track structure. 777 if not inherits in track: 778 track[inherits] = self._init_sort_track() 779 780 # Start inspecting the trigger's contents. 781 if '_' in trig: 782 # Alphabetic wildcard included. 783 cnt = self._word_count(trig) 784 self._say("\t\t\tHas a _ wildcard with " + str(cnt) + " words.") 785 if cnt > 1: 786 if not cnt in track[inherits]['alpha']: 787 track[inherits]['alpha'][cnt] = [] 788 track[inherits]['alpha'][cnt].append(trig) 789 else: 790 track[inherits]['under'].append(trig) 791 elif '#' in trig: 792 # Numeric wildcard included. 793 cnt = self._word_count(trig) 794 self._say("\t\t\tHas a # wildcard with " + str(cnt) + " words.") 795 if cnt > 1: 796 if not cnt in track[inherits]['number']: 797 track[inherits]['number'][cnt] = [] 798 track[inherits]['number'][cnt].append(trig) 799 else: 800 track[inherits]['pound'].append(trig) 801 elif '*' in trig: 802 # Wildcard included. 803 cnt = self._word_count(trig) 804 self._say("\t\t\tHas a * wildcard with " + str(cnt) + " words.") 805 if cnt > 1: 806 if not cnt in track[inherits]['wild']: 807 track[inherits]['wild'][cnt] = [] 808 track[inherits]['wild'][cnt].append(trig) 809 else: 810 track[inherits]['star'].append(trig) 811 elif '[' in trig: 812 # Optionals included. 813 cnt = self._word_count(trig) 814 self._say("\t\t\tHas optionals and " + str(cnt) + " words.") 815 if not cnt in track[inherits]['option']: 816 track[inherits]['option'][cnt] = [] 817 track[inherits]['option'][cnt].append(trig) 818 else: 819 # Totally atomic. 820 cnt = self._word_count(trig) 821 self._say("\t\t\tTotally atomic and " + str(cnt) + " words.") 822 if not cnt in track[inherits]['atomic']: 823 track[inherits]['atomic'][cnt] = [] 824 track[inherits]['atomic'][cnt].append(trig) 825 826 # Move the no-{inherits} triggers to the bottom of the stack. 827 track[ (highest_inherits + 1) ] = track[-1] 828 del(track[-1]) 829 830 # Add this group to the sort list. 831 for ip in sorted(track.keys()): 832 self._say("ip=" + str(ip)) 833 for kind in [ 'atomic', 'option', 'alpha', 'number', 'wild' ]: 834 for i in sorted(track[ip][kind], reverse=True): 835 running.extend( track[ip][kind][i] ) 836 running.extend( sorted(track[ip]['under'], key=len, reverse=True) ) 837 running.extend( sorted(track[ip]['pound'], key=len, reverse=True) ) 838 running.extend( sorted(track[ip]['star'], key=len, reverse=True) ) 839 return running

840

841 - def _sort_list(self, name, items):

842 """Sort a simple list by number of words and length.""" 843 844 def by_length(word1, word2): 845 return len(word2)-len(word1)

846 847 # Initialize the list sort buffer. 848 if not "lists" in self._sorted: 849 self._sorted["lists"] = {} 850 self._sorted["lists"][name] = [] 851 852 # Track by number of words. 853 track = {} 854 855 # Loop through each item. 856 for item in items: 857 # Count the words. 858 cword = self._word_count(item, all=True) 859 if not cword in track: 860 track[cword] = [] 861 track[cword].append(item) 862 863 # Sort them. 864 output = [] 865 for count in sorted(track.keys(), reverse=True): 866 sort = sorted(track[count], cmp=by_length) 867 output.extend(sort) 868 869 self._sorted["lists"][name] = output

870

871 - def _init_sort_track(self):

872 """Returns a new dict for keeping track of triggers for sorting.""" 873 return { 874 'atomic': {}, # Sort by number of whole words 875 'option': {}, # Sort optionals by number of words 876 'alpha': {}, # Sort alpha wildcards by no. of words 877 'number': {}, # Sort number wildcards by no. of words 878 'wild': {}, # Sort wildcards by no. of words 879 'pound': [], # Triggers of just # 880 'under': [], # Triggers of just _ 881 'star': [] # Triggers of just * 882 }

883 884 885 ############################################################################ 886 # Public Configuration Methods # 887 ############################################################################ 888

889 - def set_handler(self, language, obj):

890 """Define a custom language handler for RiveScript objects. 891 892 language: The lowercased name of the programming language, 893 e.g. python, javascript, perl 894 obj: An instance of a class object that provides the following interface: 895 896 class MyObjectHandler: 897 def __init__(self): 898 pass 899 def load(self, name, code): 900 # name = the name of the object from the RiveScript code 901 # code = the source code of the object 902 def call(self, rs, name, fields): 903 # rs = the current RiveScript interpreter object 904 # name = the name of the object being called 905 # fields = array of arguments passed to the object 906 return reply 907 908 Pass in a None value for the object to delete an existing handler (for example, 909 to prevent Python code from being able to be run by default). 910 911 Look in the `eg` folder of the rivescript-python distribution for an example 912 script that sets up a JavaScript language handler.""" 913 914 # Allow them to delete a handler too. 915 if obj == None: 916 if language in self._handlers: 917 del self._handlers[language] 918 else: 919 self._handlers[language] = obj

920

921 - def set_subroutine(self, name, code):

922 """Define a Python object from your program. 923 924 This is equivalent to having an object defined in the RiveScript code, except 925 your Python code is defining it instead. `name` is the name of the object, and 926 `code` is a Python function (a `def`) that accepts rs,args as its parameters. 927 928 This method is only available if there is a Python handler set up (which there 929 is by default, unless you've called set_handler("python", None)).""" 930 931 # Do we have a Python handler? 932 if 'python' in self._handlers: 933 self._handlers['python']._objects[name] = code 934 else: 935 self._warn("Can't set_subroutine: no Python object handler!")

936

937 - def set_global(self, name, value):

938 """Set a global variable. 939 940 Equivalent to `! global` in RiveScript code. Set to None to delete.""" 941 if value == None: 942 # Unset the variable. 943 if name in self._gvars: 944 del self._gvars[name] 945 self._gvars[name] = value

946

947 - def set_variable(self, name, value):

948 """Set a bot variable. 949 950 Equivalent to `! var` in RiveScript code. Set to None to delete.""" 951 if value == None: 952 # Unset the variable. 953 if name in self._bvars: 954 del self._bvars[name] 955 self._bvars[name] = value

956

957 - def set_substitution(self, what, rep):

958 """Set a substitution. 959 960 Equivalent to `! sub` in RiveScript code. Set to None to delete.""" 961 if rep == None: 962 # Unset the variable. 963 if what in self._subs: 964 del self._subs[what] 965 self._subs[what] = rep

966

967 - def set_person(self, what, rep):

968 """Set a person substitution. 969 970 Equivalent to `! person` in RiveScript code. Set to None to delete.""" 971 if rep == None: 972 # Unset the variable. 973 if what in self._person: 974 del self._person[what] 975 self._person[what] = rep

976

977 - def set_uservar(self, user, name, value):

978 """Set a variable for a user.""" 979 980 if not user in self._users: 981 self._users[user] = {"topic": "random"} 982 983 self._users[user][name] = value

984

985 - def get_uservar(self, user, name):

986 """Get a variable about a user. 987 988 If the user has no data at all, returns None. If the user doesn't have a value 989 set for the variable you want, returns the string 'undefined'.""" 990 991 if user in self._users: 992 if name in self._users[user]: 993 return self._users[user][name] 994 else: 995 return "undefined" 996 else: 997 return None

998

999 - def get_uservars(self, user=None):

1000 """Get all variables about a user (or all users). 1001 1002 If no username is passed, returns the entire user database structure. Otherwise, 1003 only returns the variables for the given user, or None if none exist.""" 1004 1005 if user == None: 1006 # All the users! 1007 return self._users 1008 elif user in self._users: 1009 # Just this one! 1010 return self._users[user] 1011 else: 1012 # No info. 1013 return None

1014

1015 - def clear_uservars(self, user=None):

1016 """Delete all variables about a user (or all users). 1017 1018 If no username is passed, deletes all variables about all users. Otherwise, only 1019 deletes all variables for the given user.""" 1020 1021 if user == None: 1022 # All the users! 1023 self._users = {} 1024 elif user in self._users: 1025 # Just this one. 1026 self._users[user] = {}

1027

1028 - def freeze_uservars(self, user):

1029 """Freeze the variable state for a user. 1030 1031 This will clone and preserve a user's entire variable state, so that it can be 1032 restored later with `thaw_uservars`.""" 1033 1034 if user in self._users: 1035 # Clone the user's data. 1036 self._freeze[user] = copy.deepcopy(self._users[user]) 1037 else: 1038 self._warn("Can't freeze vars for user " + user + ": not found!")

1039

1040 - def thaw_uservars(self, user, action="thaw"):

1041 """Thaw a user's frozen variables. 1042 1043 The `action` can be one of the following options: 1044 1045 discard: Don't restore the user's variables, just delete the frozen copy. 1046 keep: Keep the frozen copy after restoring the variables. 1047 thaw: Restore the variables, then delete the frozen copy (default).""" 1048 1049 if user in self._freeze: 1050 # What are we doing? 1051 if action == "thaw": 1052 # Thawing them out. 1053 self.clear_uservars(user) 1054 self._users[user] = copy.deepcopy(self._freeze[user]) 1055 del self._freeze[user] 1056 elif action == "discard": 1057 # Just discard the frozen copy. 1058 del self._freeze[user] 1059 elif action == "keep": 1060 # Keep the frozen copy afterward. 1061 self.clear_uservars(user) 1062 self._users[user] = copy.deepcopy(self._freeze[user]) 1063 else: 1064 self._warn("Unsupported thaw action") 1065 else: 1066 self._warn("Can't thaw vars for user " + user + ": not found!")

1067

1068 - def last_match(self, user):

1069 """Get the last trigger matched for the user. 1070 1071 This will return the raw trigger text that the user's last message matched. If 1072 there was no match, this will return None.""" 1073 return self.get_uservar(user, "__lastmatch__")

1074 1075 ############################################################################ 1076 # Reply Fetching Methods # 1077 ############################################################################ 1078

1079 - def reply(self, user, msg):

1080 """Fetch a reply from the RiveScript brain.""" 1081 self._say("Get reply to [" + user + "] " + msg) 1082 1083 # Format their message. 1084 msg = self._format_message(msg) 1085 1086 reply = '' 1087 1088 # If the BEGIN block exists, consult it first. 1089 if "__begin__" in self._topics: 1090 begin = self._getreply(user, 'request', context='begin') 1091 1092 # Okay to continue? 1093 if '{ok}' in begin: 1094 reply = self._getreply(user, msg) 1095 begin = re.sub('{ok}', reply, begin) 1096 1097 reply = begin 1098 1099 # Run more tag substitutions. 1100 reply = self._process_tags(user, msg, reply) 1101 else: 1102 # Just continue then. 1103 reply = self._getreply(user, msg) 1104 1105 # Save their reply history. 1106 oldInput = self._users[user]['__history__']['input'][:8] 1107 self._users[user]['__history__']['input'] = [ msg ] 1108 self._users[user]['__history__']['input'].extend(oldInput) 1109 oldReply = self._users[user]['__history__']['reply'][:8] 1110 self._users[user]['__history__']['reply'] = [ reply ] 1111 self._users[user]['__history__']['reply'].extend(oldReply) 1112 1113 return reply

1114

1115 - def _format_message(self, msg):

1116 """Format a user's message for safe processing.""" 1117 1118 # Lowercase it. 1119 msg = msg.lower() 1120 1121 # Run substitutions on it. 1122 msg = self._substitute(msg, "subs") 1123 msg = self._strip_nasties(msg) 1124 1125 return msg

1126

1127 - def _getreply(self, user, msg, context='normal', step=0):

1128 # Needed to sort replies? 1129 if not 'topics' in self._sorted: 1130 raise Exception("You forgot to call sort_replies()!") 1131 1132 # Initialize the user's profile? 1133 if not user in self._users: 1134 self._users[user] = {'topic': 'random'} 1135 1136 # Collect data on the user. 1137 topic = self._users[user]['topic'] 1138 stars = [] 1139 thatstars = [] # For %Previous's. 1140 reply = '' 1141 1142 # Avoid letting them fall into a missing topic. 1143 if not topic in self._topics: 1144 self._warn("User " + user + " was in an empty topic named '" + topic + "'") 1145 topic = self._users[user]['topic'] = 'random' 1146 1147 # Avoid deep recursion. 1148 if step > self._depth: 1149 return "ERR: Deep Recursion Detected" 1150 1151 # Are we in the BEGIN statement? 1152 if context == 'begin': 1153 topic = '__begin__' 1154 1155 # Initialize this user's history. 1156 if not '__history__' in self._users[user]: 1157 self._users[user]['__history__'] = { 1158 'input': [ 1159 'undefined', 'undefined', 'undefined', 'undefined', 1160 'undefined', 'undefined', 'undefined', 'undefined', 1161 'undefined' 1162 ], 1163 'reply': [ 1164 'undefined', 'undefined', 'undefined', 'undefined', 1165 'undefined', 'undefined', 'undefined', 'undefined', 1166 'undefined' 1167 ] 1168 } 1169 1170 # More topic sanity checking. 1171 if not topic in self._topics: 1172 # This was handled before, which would mean topic=random and 1173 # it doesn't exist. Serious issue! 1174 return "[ERR: No default topic 'random' was found!]" 1175 1176 # Create a pointer for the matched data when we find it. 1177 matched = None 1178 matchedTrigger = None 1179 foundMatch = False 1180 1181 # See if there were any %Previous's in this topic, or any topic related 1182 # to it. This should only be done the first time -- not during a 1183 # recursive redirection. This is because in a redirection, "lastreply" 1184 # is still gonna be the same as it was the first time, causing an 1185 # infinite loop! 1186 if step == 0: 1187 allTopics = [ topic ] 1188 if topic in self._includes or topic in self._lineage: 1189 # Get all the topics! 1190 allTopics = self._get_topic_tree(topic) 1191 1192 # Scan them all! 1193 for top in allTopics: 1194 self._say("Checking topic " + top + " for any %Previous's.") 1195 if top in self._sorted["thats"]: 1196 self._say("There is a %Previous in this topic!") 1197 1198 # Do we have history yet? 1199 lastReply = self._users[user]["__history__"]["reply"][0] 1200 1201 # Format the bot's last reply the same way as the human's. 1202 lastReply = self._format_message(lastReply) 1203 1204 self._say("lastReply: " + lastReply) 1205 1206 # See if it's a match. 1207 for trig in self._sorted["thats"][top]: 1208 botside = self._reply_regexp(user, trig) 1209 self._say("Try to match lastReply (" + lastReply + ") to " + botside) 1210 1211 # Match?? 1212 match = re.match(r'^' + botside + r'$', lastReply) 1213 if match: 1214 # Huzzah! See if OUR message is right too. 1215 self._say("Bot side matched!") 1216 thatstars = match.groups() 1217 for subtrig in self._sorted["that_trig"][top][trig]: 1218 humanside = self._reply_regexp(user, subtrig) 1219 self._say("Now try to match " + msg + " to " + humanside) 1220 1221 match = re.match(r'^' + humanside + '$', msg) 1222 if match: 1223 self._say("Found a match!") 1224 matched = self._thats[top][trig][subtrig] 1225 matchedTrigger = top 1226 foundMatch = True 1227 1228 # Get the stars! 1229 stars = match.groups() 1230 break 1231 1232 # Break if we found a match. 1233 if foundMatch: 1234 break 1235 # Break if we found a match. 1236 if foundMatch: 1237 break 1238 1239 # Search their topic for a match to their trigger. 1240 if not foundMatch: 1241 for trig in self._sorted["topics"][topic]: 1242 # Process the triggers. 1243 regexp = self._reply_regexp(user, trig) 1244 self._say("Try to match \"" + msg + "\" against " + trig + " (" + regexp + ")") 1245 1246 # Python's regular expression engine is slow. Try a verbatim 1247 # match if this is an atomic trigger. 1248 isAtomic = self._is_atomic(trig) 1249 isMatch = False 1250 if isAtomic: 1251 # Only look for exact matches, no sense running atomic triggers 1252 # through the regexp engine. 1253 if msg == regexp: 1254 isMatch = True 1255 else: 1256 # Non-atomic triggers always need the regexp. 1257 match = re.match(r'^' + regexp + r'$', msg) 1258 if match: 1259 # The regexp matched! 1260 isMatch = True 1261 1262 # Collect the stars. 1263 stars = match.groups() 1264 1265 if isMatch: 1266 self._say("Found a match!") 1267 1268 # We found a match, but what if the trigger we've matched 1269 # doesn't belong to our topic? Find it! 1270 if not trig in self._topics[topic]: 1271 # We have to find it. 1272 matched = self._find_trigger_by_inheritence(topic, trig) 1273 else: 1274 # We do have it! 1275 matched = self._topics[topic][trig] 1276 1277 foundMatch = True 1278 matchedTrigger = trig 1279 break 1280 1281 # Store what trigger they matched on. If their matched trigger is None, 1282 # this will be too, which is great. 1283 self._users[user]["__lastmatch__"] = matchedTrigger 1284 1285 if matched: 1286 for nil in [1]: 1287 # See if there are any hard redirects. 1288 if matched["redirect"]: 1289 self._say("Redirecting us to " + matched["redirect"]) 1290 redirect = self._process_tags(user, msg, matched["redirect"], stars, thatstars, step) 1291 self._say("Pretend user said: " + redirect) 1292 reply = self._getreply(user, redirect, step=(step+1)) 1293 break 1294 1295 # Check the conditionals. 1296 for con in sorted(matched["condition"]): 1297 halves = re.split(r'\s*=>\s*', matched["condition"][con]) 1298 if halves and len(halves) == 2: 1299 condition = re.match(r'^(.+?)\s+(==|eq|!=|ne|<>|<|<=|>|>=)\s+(.+?)$', halves[0]) 1300 if condition: 1301 left = condition.group(1) 1302 eq = condition.group(2) 1303 right = condition.group(3) 1304 potreply = halves[1] 1305 self._say("Left: " + left + "; eq: " + eq + "; right: " + right + " => " + potreply) 1306 1307 # Process tags all around. 1308 left = self._process_tags(user, msg, left, stars, thatstars, step) 1309 right = self._process_tags(user, msg, right, stars, thatstars, step) 1310 1311 # Defaults? 1312 if len(left) == 0: 1313 left = 'undefined' 1314 if len(right) == 0: 1315 right = 'undefined' 1316 1317 self._say("Check if " + left + " " + eq + " " + right) 1318 1319 # Validate it. 1320 passed = False 1321 if eq == 'eq' or eq == '==': 1322 if left == right: 1323 passed = True 1324 elif eq == 'ne' or eq == '!=' or eq == '<>': 1325 if left != right: 1326 passed = True 1327 else: 1328 # Gasp, dealing with numbers here... 1329 try: 1330 left, right = int(left), int(right) 1331 if eq == '<': 1332 if left < right: 1333 passed = True 1334 elif eq == '<=': 1335 if left <= right: 1336 passed = True 1337 elif eq == '>': 1338 if left > right: 1339 passed = True 1340 elif eq == '>=': 1341 if left >= right: 1342 passed = True 1343 except: 1344 self._warn("Failed to evaluate numeric condition!") 1345 1346 # How truthful? 1347 if passed: 1348 reply = potreply 1349 break 1350 1351 # Have our reply yet? 1352 if len(reply) > 0: 1353 break 1354 1355 # Process weights in the replies. 1356 bucket = [] 1357 for rep in sorted(matched["reply"]): 1358 text = matched["reply"][rep] 1359 weight = 1 1360 match = re.match(re_weight, text) 1361 if match: 1362 weight = int(match.group(1)) 1363 if weight <= 0: 1364 self._warn("Can't have a weight <= 0!") 1365 weight = 1 1366 for i in range(0, weight): 1367 bucket.append(text) 1368 1369 # Get a random reply. 1370 reply = random.choice(bucket) 1371 break 1372 1373 # Still no reply? 1374 if not foundMatch: 1375 reply = "ERR: No Reply Matched" 1376 elif len(reply) == 0: 1377 reply = "ERR: No Reply Found" 1378 1379 self._say("Reply: " + reply) 1380 1381 # Process tags for the BEGIN block. 1382 if context == "begin": 1383 # BEGIN blocks can only set topics and uservars. The rest happen 1384 # later! 1385 reTopic = re.findall(r'\{topic=(.+?)\}', reply) 1386 for match in reTopic: 1387 self._say("Setting user's topic to " + match) 1388 self._users[user]["topic"] = match 1389 reply = re.sub(r'\{topic=' + re.escape(match) + r'\}', '', reply) 1390 1391 reSet = re.findall('<set (.+?)=(.+?)>', reply) 1392 for match in reSet: 1393 self._say("Set uservar " + str(match[0]) + "=" + str(match[1])) 1394 self._users[user][ match[0] ] = match[1] 1395 reply = re.sub('<set ' + re.escape(match[0]) + '=' + re.escape(match[1]) + '>', '', reply) 1396 else: 1397 # Process more tags if not in BEGIN. 1398 reply = self._process_tags(user, msg, reply, stars, thatstars, step) 1399 1400 return reply

1401

1402 - def _substitute(self, msg, list):

1403 """Run a kind of substitution on a message.""" 1404 1405 # Safety checking. 1406 if not 'lists' in self._sorted: 1407 raise Exception("You forgot to call sort_replies()!") 1408 if not list in self._sorted["lists"]: 1409 raise Exception("You forgot to call sort_replies()!") 1410 1411 # Get the substitution map. 1412 subs = None 1413 if list == 'subs': 1414 subs = self._subs 1415 else: 1416 subs = self._person 1417 1418 for pattern in self._sorted["lists"][list]: 1419 result = "<rot13sub>" + self._rot13(subs[pattern]) + "<bus31tor>" 1420 qm = re.escape(pattern) 1421 msg = re.sub(r'^' + qm + "$", result, msg) 1422 msg = re.sub(r'^' + qm + r'(\W+)', result+r'\1', msg) 1423 msg = re.sub(r'(\W+)' + qm + r'(\W+)', r'\1'+result+r'\2', msg) 1424 msg = re.sub(r'(\W+)' + qm + r'$', r'\1'+result, msg) 1425 1426 placeholders = re.findall(re_rot13, msg) 1427 for match in placeholders: 1428 rot13 = match 1429 decoded = self._rot13(match) 1430 msg = re.sub('<rot13sub>' + re.escape(rot13) + '<bus31tor>', decoded, msg) 1431 1432 # Strip & return. 1433 return msg.strip()

1434

1435 - def _reply_regexp(self, user, regexp):

1436 """Prepares a trigger for the regular expression engine.""" 1437 1438 # If the trigger is simply '*' then the * there needs to become (.*?) 1439 # to match the blank string too. 1440 regexp = re.sub(r'^\*$', r'<zerowidthstar>', regexp) 1441 1442 # Simple replacements. 1443 regexp = re.sub(r'\*', r'(.+?)', regexp) # Convert * into (.+?) 1444 regexp = re.sub(r'#', r'(\d+?)', regexp) # Convert # into (\d+?) 1445 regexp = re.sub(r'_', r'([A-Za-z]+?)', regexp) # Convert _ into (\w+?) 1446 regexp = re.sub(r'\{weight=\d+\}', '', regexp) # Remove {weight} tags 1447 regexp = re.sub(r'<zerowidthstar>', r'(.*?)', regexp) 1448 1449 # Optionals. 1450 optionals = re.findall(r'\[(.+?)\]', regexp) 1451 for match in optionals: 1452 parts = match.split("|") 1453 new = [] 1454 for p in parts: 1455 p = r'\s*' + p + r'\s*' 1456 new.append(p) 1457 new.append(r'\s*') 1458 1459 # If this optional had a star or anything in it, make it 1460 # non-matching. 1461 pipes = '|'.join(new) 1462 pipes = re.sub(re.escape('(.+?)'), '(?:.+?)', pipes) 1463 pipes = re.sub(re.escape('(\d+?)'), '(?:\d+?)', pipes) 1464 pipes = re.sub(re.escape('([A-Za-z]+?)'), '(?:[A-Za-z]+?)', pipes) 1465 1466 regexp = re.sub(r'\s*\[' + re.escape(match) + '\]\s*', '(?:' + pipes + ')', regexp) 1467 1468 # Filter in arrays. 1469 arrays = re.findall(r'\@(.+?)\b', regexp) 1470 for array in arrays: 1471 rep = '' 1472 if array in self._arrays: 1473 rep = r'(?:' + '|'.join(self._arrays[array]) + ')' 1474 regexp = re.sub(r'\@' + re.escape(array) + r'\b', rep, regexp) 1475 1476 # Filter in bot variables. 1477 bvars = re.findall(r'<bot (.+?)>', regexp) 1478 for var in bvars: 1479 rep = '' 1480 if var in self._bvars: 1481 rep = self._strip_nasties(self._bvars[var]) 1482 regexp = re.sub(r'<bot ' + re.escape(var) + r'>', rep, regexp) 1483 1484 # Filter in user variables. 1485 uvars = re.findall(r'<get (.+?)>', regexp) 1486 for var in uvars: 1487 rep = '' 1488 if var in self._users[user]: 1489 rep = self._strip_nasties(self._users[user][var]) 1490 regexp = re.sub(r'<get ' + re.escape(var) + r'>', rep, regexp) 1491 1492 # Filter in <input> and <reply> tags. This is a slow process, so only 1493 # do it if we have to! 1494 if '<input' in regexp or '<reply' in regexp: 1495 for type in ['input','reply']: 1496 tags = re.findall(r'<' + type + r'([0-9])>', regexp) 1497 for index in tags: 1498 index = int(index) - 1 1499 rep = self._format_message(self._users[user]['__history__'][type][index]) 1500 regexp = re.sub(r'<' + type + str(index) + r'>', rep, regexp) 1501 regexp = re.sub( 1502 '<' + type + '>', 1503 self._format_message(self._users[user]['__history__'][type][0]), 1504 regexp 1505 ) 1506 # TODO: the Perl version doesn't do just <input>/<reply> in trigs! 1507 1508 return regexp

1509

1510 - def _process_tags(self, user, msg, reply, st=[], bst=[], depth=0):

1511 """Post process tags in a message.""" 1512 stars = [''] 1513 stars.extend(st) 1514 botstars = [''] 1515 botstars.extend(bst) 1516 if len(stars) == 1: 1517 stars.append("undefined") 1518 if len(botstars) == 1: 1519 botstars.append("undefined") 1520 1521 # Tag shortcuts. 1522 reply = re.sub('<person>', '{person}<star>{/person}', reply) 1523 reply = re.sub('<@>', '{@<star>}', reply) 1524 reply = re.sub('<formal>', '{formal}<star>{/formal}', reply) 1525 reply = re.sub('<sentence>', '{sentence}<star>{/sentence}', reply) 1526 reply = re.sub('<uppercase>', '{uppercase}<star>{/uppercase}', reply) 1527 reply = re.sub('<lowercase>', '{lowercase}<star>{/lowercase}', reply) 1528 1529 # Weight and <star> tags. 1530 reply = re.sub(r'\{weight=\d+\}', '', reply) # Leftover {weight}s 1531 if len(stars) > 0: 1532 reply = re.sub('<star>', stars[1], reply) 1533 reStars = re.findall(r'<star(\d+)>', reply) 1534 for match in reStars: 1535 if int(match) < len(stars): 1536 reply = re.sub(r'<star' + match + '>', stars[int(match)], reply) 1537 if len(botstars) > 0: 1538 reply = re.sub('<botstar>', botstars[1], reply) 1539 reStars = re.findall(r'<botstar(\d+)>', reply) 1540 for match in reStars: 1541 if int(match) < len(botstars): 1542 reply = re.sub(r'<botstar' + match + '>', botstars[int(match)], reply) 1543 1544 # <input> and <reply> 1545 reply = re.sub('<input>', self._users[user]['__history__']['input'][0], reply) 1546 reply = re.sub('<reply>', self._users[user]['__history__']['reply'][0], reply) 1547 reInput = re.findall(r'<input([0-9])>', reply) 1548 for match in reInput: 1549 reply = re.sub(r'<input' + match + r'>', self._users[user]['__history__']['input'][int(match)], reply) 1550 reReply = re.findall(r'<reply([0-9])>', reply) 1551 for match in reReply: 1552 reply = re.sub(r'<reply' + match + r'>', self._users[user]['__history__']['reply'][int(match)], reply) 1553 1554 # <id> and escape codes. 1555 reply = re.sub(r'<id>', user, reply) 1556 reply = re.sub(r'\\s', ' ', reply) 1557 reply = re.sub(r'\\n', "\n", reply) 1558 reply = re.sub(r'\\#', r'#', reply) 1559 1560 # Random bits. 1561 reRandom = re.findall(r'\{random\}(.+?)\{/random\}', reply) 1562 for match in reRandom: 1563 output = '' 1564 if '|' in match: 1565 output = random.choice(match.split('|')) 1566 else: 1567 output = random.choice(match.split(' ')) 1568 reply = re.sub(r'\{random\}' + re.escape(match) + r'\{/random\}', output, reply) 1569 1570 # Person Substitutions and String Formatting. 1571 for item in ['person','formal','sentence','uppercase','lowercase']: 1572 matcher = re.findall(r'\{' + item + r'\}(.+?)\{/' + item + r'\}', reply) 1573 for match in matcher: 1574 output = None 1575 if item == 'person': 1576 # Person substitutions. 1577 output = self._substitute(match, "person") 1578 else: 1579 output = self._string_format(match, item) 1580 reply = re.sub(r'\{' + item + r'\}' + re.escape(match) + '\{/' + item + r'\}', output, reply) 1581 1582 # Bot variables: set (TODO: Perl RS doesn't support this) 1583 reBotSet = re.findall(r'<bot (.+?)=(.+?)>', reply) 1584 for match in reBotSet: 1585 self._say("Set bot variable " + str(match[0]) + "=" + str(match[1])) 1586 self._bvars[ match[0] ] = match[1] 1587 reply = re.sub(r'<bot ' + re.escape(match[0]) + '=' + re.escape(match[1]) + '>', '', reply) 1588 1589 # Bot variables: get 1590 reBot = re.findall(r'<bot (.+?)>', reply) 1591 for match in reBot: 1592 val = 'undefined' 1593 if match in self._bvars: 1594 val = self._bvars[match] 1595 reply = re.sub(r'<bot ' + re.escape(match) + '>', val, reply) 1596 1597 # Global vars: set (TODO: Perl RS doesn't support this) 1598 reEnvSet = re.findall(r'<env (.+?)=(.+?)>', reply) 1599 for match in reEnvSet: 1600 self._say("Set global variable " + str(match[0]) + "=" + str(match[1])) 1601 self._gvars[ match[0] ] = match[1] 1602 reply = re.sub(r'<env ' + re.escape(match[0]) + '=' + re.escape(match[1]) + '>', '', reply) 1603 1604 # Global vars 1605 reEnv = re.findall(r'<env (.+?)>', reply) 1606 for match in reEnv: 1607 val = 'undefined' 1608 if match in self._gvars: 1609 val = self._gvars[match] 1610 reply = re.sub(r'<env ' + re.escape(match) + '>', val, reply) 1611 1612 # Streaming code. DEPRECATED! 1613 if '{!' in reply: 1614 self._warn("Use of the {!...} tag is deprecated and not supported here.") 1615 1616 # Set user vars. 1617 reSet = re.findall('<set (.+?)=(.+?)>', reply) 1618 for match in reSet: 1619 self._say("Set uservar " + str(match[0]) + "=" + str(match[1])) 1620 self._users[user][ match[0] ] = match[1] 1621 reply = re.sub('<set ' + re.escape(match[0]) + '=' + re.escape(match[1]) + '>', '', reply) 1622 1623 # Math tags. 1624 for item in ['add','sub','mult','div']: 1625 matcher = re.findall('<' + item + r' (.+?)=(.+?)>', reply) 1626 for match in matcher: 1627 var = match[0] 1628 value = match[1] 1629 output = '' 1630 1631 # Sanity check the value. 1632 try: 1633 value = int(value) 1634 1635 # So far so good, initialize this one? 1636 if not var in self._users[user]: 1637 self._users[user][var] = 0 1638 except: 1639 output = "[ERR: Math can't '" + item + "' non-numeric value '" + value + "']" 1640 1641 # Attempt the operation. 1642 try: 1643 orig = int(self._users[user][var]) 1644 new = 0 1645 if item == 'add': 1646 new = orig + value 1647 elif item == 'sub': 1648 new = orig - value 1649 elif item == 'mult': 1650 new = orig * value 1651 elif item == 'div': 1652 new = orig / value 1653 self._users[user][var] = new 1654 except: 1655 output = "[ERR: Math couldn't '" + item + "' to value '" + self._users[user][var] + "']" 1656 1657 reply = re.sub('<' + item + ' ' + re.escape(var) + '=' + re.escape(str(value)) + '>', output, reply) 1658 1659 # Get user vars. 1660 reGet = re.findall(r'<get (.+?)>', reply) 1661 for match in reGet: 1662 output = 'undefined' 1663 if match in self._users[user]: 1664 output = self._users[user][match] 1665 reply = re.sub('<get ' + re.escape(match) + '>', str(output), reply) 1666 1667 # Topic setter. 1668 reTopic = re.findall(r'\{topic=(.+?)\}', reply) 1669 for match in reTopic: 1670 self._say("Setting user's topic to " + match) 1671 self._users[user]["topic"] = match 1672 reply = re.sub(r'\{topic=' + re.escape(match) + r'\}', '', reply) 1673 1674 # Inline redirecter. 1675 reRedir = re.findall(r'\{@(.+?)\}', reply) 1676 for match in reRedir: 1677 self._say("Redirect to " + match) 1678 at = match.strip() 1679 subreply = self._getreply(user, at, step=(depth + 1)) 1680 reply = re.sub(r'\{@' + re.escape(match) + r'\}', subreply, reply) 1681 1682 # Object caller. 1683 reCall = re.findall(r'<call>(.+?)</call>', reply) 1684 for match in reCall: 1685 parts = re.split(re_ws, match) 1686 output = '' 1687 obj = parts[0] 1688 args = [] 1689 if len(parts) > 1: 1690 args = parts[1:] 1691 1692 # Do we know this object? 1693 if obj in self._objlangs: 1694 # We do, but do we have a handler for that language? 1695 lang = self._objlangs[obj] 1696 if lang in self._handlers: 1697 # We do. 1698 output = self._handlers[lang].call(self, obj, args) 1699 else: 1700 output = '[ERR: No Object Handler]' 1701 else: 1702 output = '[ERR: Object Not Found]' 1703 1704 reply = re.sub('<call>' + re.escape(match) + r'</call>', output, reply) 1705 1706 return reply

1707

1708 - def _string_format(self, msg, method):

1709 """Format a string (upper, lower, formal, sentence).""" 1710 if method == "uppercase": 1711 return msg.upper() 1712 elif method == "lowercase": 1713 return msg.lower() 1714 elif method == "sentence": 1715 return msg.capitalize() 1716 elif method == "formal": 1717 return string.capwords(msg)

1718 1719 ############################################################################ 1720 # Topic Inheritence Utility Methods # 1721 ############################################################################ 1722

1723 - def _topic_triggers(self, topic, triglvl, depth=0, inheritence=0, inherited=False):

1724 """Recursively scan a topic and return a list of all triggers.""" 1725 1726 # Break if we're in too deep. 1727 if depth > self._depth: 1728 self._warn("Deep recursion while scanning topic inheritence") 1729 1730 # Important info about the depth vs inheritence params to this function: 1731 # depth increments by 1 each time this function recursively calls itself. 1732 # inheritence increments by 1 only when this topic inherits another 1733 # topic. 1734 # 1735 # This way, '> topic alpha includes beta inherits gamma' will have this 1736 # effect: 1737 # alpha and beta's triggers are combined together into one matching 1738 # pool, and then those triggers have higher matching priority than 1739 # gamma's. 1740 # 1741 # The inherited option is True if this is a recursive call, from a topic 1742 # that inherits other topics. This forces the {inherits} tag to be added 1743 # to the triggers. This only applies when the top topic 'includes' 1744 # another topic. 1745 self._say("\tCollecting trigger list for topic " + topic + "(depth=" \ 1746 + str(depth) + "; inheritence=" + str(inheritence) + "; " \ 1747 + "inherited=" + str(inherited) + ")") 1748 1749 # topic: the name of the topic 1750 # triglvl: reference to self._topics or self._thats 1751 # depth: starts at 0 and ++'s with each recursion 1752 1753 # Collect an array of triggers to return. 1754 triggers = [] 1755 1756 # Get those that exist in this topic directly. 1757 inThisTopic = [] 1758 if topic in triglvl: 1759 for trigger in triglvl[topic]: 1760 inThisTopic.append(trigger) 1761 1762 # Does this topic include others? 1763 if topic in self._includes: 1764 # Check every included topic. 1765 for includes in self._includes[topic]: 1766 self._say("\t\tTopic " + topic + " includes " + includes) 1767 triggers.extend(self._topic_triggers(includes, triglvl, (depth + 1), inheritence, True)) 1768 1769 # Does this topic inherit others? 1770 if topic in self._lineage: 1771 # Check every inherited topic. 1772 for inherits in self._lineage[topic]: 1773 self._say("\t\tTopic " + topic + " inherits " + inherits) 1774 triggers.extend(self._topic_triggers(inherits, triglvl, (depth + 1), (inheritence + 1), False)) 1775 1776 # Collect the triggers for *this* topic. If this topic inherits any 1777 # other topics, it means that this topic's triggers have higher 1778 # priority than those in any inherited topics. Enforce this with an 1779 # {inherits} tag. 1780 if topic in self._lineage or inherited: 1781 for trigger in inThisTopic: 1782 self._say("\t\tPrefixing trigger with {inherits=" + str(inheritence) + "}" + trigger) 1783 triggers.append("{inherits=" + str(inheritence) + "}" + trigger) 1784 else: 1785 triggers.extend(inThisTopic) 1786 1787 return triggers

1788

1789 - def _find_trigger_by_inheritence(self, topic, trig, depth=0):

1790 """Locate the replies for a trigger in an inherited/included topic.""" 1791 1792 # This sub was called because the user matched a trigger from the sorted 1793 # array, but the trigger doesn't belong to their topic, and is instead 1794 # in an inherited or included topic. This is to search for it. 1795 1796 # Prevent recursion. 1797 if depth > self._depth: 1798 self._warn("Deep recursion detected while following an inheritence trail!") 1799 return None 1800 1801 # Inheritence is more important than inclusion: triggers in one topic can 1802 # override those in an inherited topic. 1803 if topic in self._lineage: 1804 for inherits in sorted(self._lineage[topic]): 1805 # See if this inherited topic has our trigger. 1806 if trig in self._topics[inherits]: 1807 # Great! 1808 return self._topics[inherits][trig] 1809 else: 1810 # Check what THAT topic inherits from. 1811 match = self._find_trigger_by_inheritence( 1812 inherits, trig, (depth+1) 1813 ) 1814 if match: 1815 # Found it! 1816 return match 1817 1818 # See if this topic has an "includes" 1819 if topic in self._includes: 1820 for includes in sorted(self._includes[topic]): 1821 # See if this included topic has our trigger. 1822 if trig in self._topics[includes]: 1823 # Great! 1824 return self._topics[includes][trig] 1825 else: 1826 # Check what THAT topic inherits from. 1827 match = self._find_trigger_by_inheritence( 1828 includes, trig, (depth+1) 1829 ) 1830 if match: 1831 # Found it! 1832 return match 1833 1834 # Don't know what else to do! 1835 self._warn("User matched a trigger, " + trig + ", but I can't find out what topic it belongs to!") 1836 return None

1837

1838 - def _get_topic_tree(self, topic, depth=0):

1839 """Given one topic, get the list of all included/inherited topics.""" 1840 1841 # Break if we're in too deep. 1842 if depth > self._depth: 1843 self._warn("Deep recursion while scanning topic trees!") 1844 return [] 1845 1846 # Collect an array of all topics. 1847 topics = [ topic ] 1848 1849 # Does this topic include others? 1850 if topic in self._includes: 1851 # Try each of these. 1852 for includes in sorted(self._includes[topic]): 1853 topics.extend( self._get_topic_tree(includes, depth+1) ) 1854 1855 # Does this topic inherit others? 1856 if topic in self._lineage: 1857 # Try each of these. 1858 for inherits in sorted(self._lineage[topic]): 1859 topics.extend( self._get_topic_tree(inherits, depth+1) ) 1860 1861 return topics

1862 1863 ############################################################################ 1864 # Miscellaneous Private Methods # 1865 ############################################################################ 1866

1867 - def _is_atomic(self, trigger):

1868 """Determine if a trigger is atomic or not.""" 1869 1870 # Atomic triggers don't contain any wildcards or parenthesis or anything 1871 # of the sort. We don't need to test the full character set, just left 1872 # brackets will do. 1873 special = [ '*', '#', '_', '(', '[', '<' ] 1874 for char in special: 1875 if char in trigger: 1876 return False 1877 1878 return True

1879

1880 - def _word_count(self, trigger, all=False):

1881 """Count the words that aren't wildcards in a trigger.""" 1882 words = [] 1883 if all: 1884 words = re.split(re_ws, trigger) 1885 else: 1886 words = re.split(re_wilds, trigger) 1887 1888 wc = 0 # Word count 1889 for word in words: 1890 if len(word) > 0: 1891 wc += 1 1892 1893 return wc

1894

1895 - def _rot13(self, n):

1896 """Encode and decode a string into ROT13.""" 1897 trans = string.maketrans( 1898 "ABCDEFGHIJKLMabcdefghijklmNOPQRSTUVWXYZnopqrstuvwxyz", 1899 "NOPQRSTUVWXYZnopqrstuvwxyzABCDEFGHIJKLMabcdefghijklm") 1900 return string.translate(str(n), trans)

1901

1902 - def _strip_nasties(self, s):

1903 """Formats a string for ASCII regex matching.""" 1904 s = re.sub(re_nasties, '', s) 1905 return s

1906

1907 - def _dump(self):

1908 """For debugging, dump the entire data structure.""" 1909 pp = pprint.PrettyPrinter(indent=4) 1910 1911 print "=== Variables ===" 1912 print "-- Globals --" 1913 pp.pprint(self._gvars) 1914 print "-- Bot vars --" 1915 pp.pprint(self._bvars) 1916 print "-- Substitutions --" 1917 pp.pprint(self._subs) 1918 print "-- Person Substitutions --" 1919 pp.pprint(self._person) 1920 print "-- Arrays --" 1921 pp.pprint(self._arrays) 1922 1923 print "=== Topic Structure ===" 1924 pp.pprint(self._topics) 1925 print "=== %Previous Structure ===" 1926 pp.pprint(self._thats) 1927 1928 print "=== Includes ===" 1929 pp.pprint(self._includes) 1930 1931 print "=== Inherits ===" 1932 pp.pprint(self._lineage) 1933 1934 print "=== Sort Buffer ===" 1935 pp.pprint(self._sorted)

1936 1937 ################################################################################ 1938 # Interactive Mode # 1939 ################################################################################ 1940 1941 if __name__ == "__main__": 1942 from interactive import interactive_mode 1943 interactive_mode() 1944 1945 # vim:expandtab 1946

Source Code for Package rivescript