Package winappdbg :: Module search
[hide private]
[frames] | no frames]

Source Code for Module winappdbg.search

  1  #!~/.wine/drive_c/Python25/python.exe 
  2  # -*- coding: utf-8 -*- 
  3   
  4  # Process memory finder 
  5  # Copyright (c) 2009-2014, Mario Vilas 
  6  # All rights reserved. 
  7  # 
  8  # Redistribution and use in source and binary forms, with or without 
  9  # modification, are permitted provided that the following conditions are met: 
 10  # 
 11  #     * Redistributions of source code must retain the above copyright notice, 
 12  #       this list of conditions and the following disclaimer. 
 13  #     * Redistributions in binary form must reproduce the above copyright 
 14  #       notice,this list of conditions and the following disclaimer in the 
 15  #       documentation and/or other materials provided with the distribution. 
 16  #     * Neither the name of the copyright holder nor the names of its 
 17  #       contributors may be used to endorse or promote products derived from 
 18  #       this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 21  # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 22  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 23  # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 24  # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 25  # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 26  # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 27  # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 28  # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 29  # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 30  # POSSIBILITY OF SUCH DAMAGE. 
 31   
 32  """ 
 33  Process memory search. 
 34   
 35  @group Memory search: 
 36      Search, 
 37      Pattern, 
 38      BytePattern, 
 39      TextPattern, 
 40      RegExpPattern, 
 41      HexPattern 
 42  """ 
 43   
 44  __revision__ = "$Id: search.py 1299 2013-12-20 09:30:55Z qvasimodo $" 
 45   
 46  __all__ =   [ 
 47                  'Search', 
 48                  'Pattern', 
 49                  'BytePattern', 
 50                  'TextPattern', 
 51                  'RegExpPattern', 
 52                  'HexPattern', 
 53              ] 
 54   
 55  from textio import HexInput 
 56  from util import StaticClass, MemoryAddresses 
 57  import win32 
 58   
 59  import warnings 
 60   
 61  try: 
 62      # http://pypi.python.org/pypi/regex 
 63      import regex as re 
 64  except ImportError: 
 65      import re 
66 67 #============================================================================== 68 69 -class Pattern (object):
70 """ 71 Base class for search patterns. 72 73 The following L{Pattern} subclasses are provided by WinAppDbg: 74 - L{BytePattern} 75 - L{TextPattern} 76 - L{RegExpPattern} 77 - L{HexPattern} 78 79 @see: L{Search.search_process} 80 """ 81
82 - def __init__(self, pattern):
83 """ 84 Class constructor. 85 86 The only mandatory argument should be the pattern string. 87 88 This method B{MUST} be reimplemented by subclasses of L{Pattern}. 89 """ 90 raise NotImplementedError()
91
92 - def __len__(self):
93 """ 94 Returns the maximum expected length of the strings matched by this 95 pattern. Exact behavior is implementation dependent. 96 97 Ideally it should be an exact value, but in some cases it's not 98 possible to calculate so an upper limit should be returned instead. 99 100 If that's not possible either an exception must be raised. 101 102 This value will be used to calculate the required buffer size when 103 doing buffered searches. 104 105 This method B{MUST} be reimplemented by subclasses of L{Pattern}. 106 """ 107 raise NotImplementedError()
108
109 - def read(self, process, address, size):
110 """ 111 Reads the requested number of bytes from the process memory at the 112 given address. 113 114 Subclasses of L{Pattern} tipically don't need to reimplement this 115 method. 116 """ 117 return process.read(address, size)
118
119 - def find(self, buffer, pos = None):
120 """ 121 Searches for the pattern in the given buffer, optionally starting at 122 the given position within the buffer. 123 124 This method B{MUST} be reimplemented by subclasses of L{Pattern}. 125 126 @type buffer: str 127 @param buffer: Buffer to search on. 128 129 @type pos: int 130 @param pos: 131 (Optional) Position within the buffer to start searching from. 132 133 @rtype: tuple( int, int ) 134 @return: Tuple containing the following: 135 - Position within the buffer where a match is found, or C{-1} if 136 no match was found. 137 - Length of the matched data if a match is found, or undefined if 138 no match was found. 139 """ 140 raise NotImplementedError()
141
142 - def found(self, address, size, data):
143 """ 144 This method gets called when a match is found. 145 146 This allows subclasses of L{Pattern} to filter out unwanted results, 147 or modify the results before giving them to the caller of 148 L{Search.search_process}. 149 150 If the return value is C{None} the result is skipped. 151 152 Subclasses of L{Pattern} don't need to reimplement this method unless 153 filtering is needed. 154 155 @type address: int 156 @param address: The memory address where the pattern was found. 157 158 @type size: int 159 @param size: The size of the data that matches the pattern. 160 161 @type data: str 162 @param data: The data that matches the pattern. 163 164 @rtype: tuple( int, int, str ) 165 @return: Tuple containing the following: 166 * The memory address where the pattern was found. 167 * The size of the data that matches the pattern. 168 * The data that matches the pattern. 169 """ 170 return (address, size, data)
171
172 #------------------------------------------------------------------------------ 173 174 -class BytePattern (Pattern):
175 """ 176 Fixed byte pattern. 177 178 @type pattern: str 179 @ivar pattern: Byte string to search for. 180 181 @type length: int 182 @ivar length: Length of the byte pattern. 183 """ 184
185 - def __init__(self, pattern):
186 """ 187 @type pattern: str 188 @param pattern: Byte string to search for. 189 """ 190 self.pattern = str(pattern) 191 self.length = len(pattern)
192
193 - def __len__(self):
194 """ 195 Returns the exact length of the pattern. 196 197 @see: L{Pattern.__len__} 198 """ 199 return self.length
200
201 - def find(self, buffer, pos = None):
202 return buffer.find(self.pattern, pos), self.length
203
204 #------------------------------------------------------------------------------ 205 206 # FIXME: case insensitive unicode searches are probably buggy! 207 208 -class TextPattern (BytePattern):
209 """ 210 Text pattern. 211 212 @type isUnicode: bool 213 @ivar isUnicode: C{True} if the text to search for is a unicode string, 214 C{False} otherwise. 215 216 @type encoding: str 217 @ivar encoding: Encoding for the text parameter. 218 Only used when the text to search for is a Unicode string. 219 Don't change unless you know what you're doing! 220 221 @type caseSensitive: bool 222 @ivar caseSensitive: C{True} of the search is case sensitive, 223 C{False} otherwise. 224 """ 225
226 - def __init__(self, text, encoding = "utf-16le", caseSensitive = False):
227 """ 228 @type text: str or unicode 229 @param text: Text to search for. 230 231 @type encoding: str 232 @param encoding: (Optional) Encoding for the text parameter. 233 Only used when the text to search for is a Unicode string. 234 Don't change unless you know what you're doing! 235 236 @type caseSensitive: bool 237 @param caseSensitive: C{True} of the search is case sensitive, 238 C{False} otherwise. 239 """ 240 self.isUnicode = isinstance(text, unicode) 241 self.encoding = encoding 242 self.caseSensitive = caseSensitive 243 if not self.caseSensitive: 244 pattern = text.lower() 245 if self.isUnicode: 246 pattern = text.encode(encoding) 247 super(TextPattern, self).__init__(pattern)
248
249 - def read(self, process, address, size):
250 data = super(TextPattern, self).read(address, size) 251 if not self.caseSensitive: 252 if self.isUnicode: 253 try: 254 encoding = self.encoding 255 text = data.decode(encoding, "replace") 256 text = text.lower() 257 new_data = text.encode(encoding, "replace") 258 if len(data) == len(new_data): 259 data = new_data 260 else: 261 data = data.lower() 262 except Exception: 263 data = data.lower() 264 else: 265 data = data.lower() 266 return data
267
268 - def found(self, address, size, data):
269 if self.isUnicode: 270 try: 271 data = unicode(data, self.encoding) 272 except Exception, e: 273 ## traceback.print_exc(e) # XXX DEBUG 274 return None 275 return (address, size, data)
276
277 #------------------------------------------------------------------------------ 278 279 -class RegExpPattern (Pattern):
280 """ 281 Regular expression pattern. 282 283 @type pattern: str 284 @ivar pattern: Regular expression in text form. 285 286 @type flags: int 287 @ivar flags: Regular expression flags. 288 289 @type regexp: re.compile 290 @ivar regexp: Regular expression in compiled form. 291 292 @type maxLength: int 293 @ivar maxLength: 294 Maximum expected length of the strings matched by this regular 295 expression. 296 297 This value will be used to calculate the required buffer size when 298 doing buffered searches. 299 300 Ideally it should be an exact value, but in some cases it's not 301 possible to calculate so an upper limit should be given instead. 302 303 If that's not possible either, C{None} should be used. That will 304 cause an exception to be raised if this pattern is used in a 305 buffered search. 306 """ 307
308 - def __init__(self, regexp, flags = 0, maxLength = None):
309 """ 310 @type regexp: str 311 @param regexp: Regular expression string. 312 313 @type flags: int 314 @param flags: Regular expression flags. 315 316 @type maxLength: int 317 @param maxLength: Maximum expected length of the strings matched by 318 this regular expression. 319 320 This value will be used to calculate the required buffer size when 321 doing buffered searches. 322 323 Ideally it should be an exact value, but in some cases it's not 324 possible to calculate so an upper limit should be given instead. 325 326 If that's not possible either, C{None} should be used. That will 327 cause an exception to be raised if this pattern is used in a 328 buffered search. 329 """ 330 self.pattern = regexp 331 self.flags = flags 332 self.regexp = re.compile(regexp, flags) 333 self.maxLength = maxLength
334
335 - def __len__(self):
336 """ 337 Returns the maximum expected length of the strings matched by this 338 pattern. This value is taken from the C{maxLength} argument of the 339 constructor if this class. 340 341 Ideally it should be an exact value, but in some cases it's not 342 possible to calculate so an upper limit should be returned instead. 343 344 If that's not possible either an exception must be raised. 345 346 This value will be used to calculate the required buffer size when 347 doing buffered searches. 348 """ 349 if self.maxLength is None: 350 raise NotImplementedError() 351 return self.maxLength
352
353 - def find(self, buffer, pos = None):
354 if not pos: # make sure pos is an int 355 pos = 0 356 match = self.regexp.search(buffer, pos) 357 if match: 358 start, end = match.span() 359 return start, end - start 360 return -1, 0
361
362 #------------------------------------------------------------------------------ 363 364 -class HexPattern (RegExpPattern):
365 """ 366 Hexadecimal pattern. 367 368 Hex patterns must be in this form:: 369 "68 65 6c 6c 6f 20 77 6f 72 6c 64" # "hello world" 370 371 Spaces are optional. Capitalization of hex digits doesn't matter. 372 This is exactly equivalent to the previous example:: 373 "68656C6C6F20776F726C64" # "hello world" 374 375 Wildcards are allowed, in the form of a C{?} sign in any hex digit:: 376 "5? 5? c3" # pop register / pop register / ret 377 "b8 ?? ?? ?? ??" # mov eax, immediate value 378 379 @type pattern: str 380 @ivar pattern: Hexadecimal pattern. 381 """ 382
383 - def __new__(cls, pattern):
384 """ 385 If the pattern is completely static (no wildcards are present) a 386 L{BytePattern} is created instead. That's because searching for a 387 fixed byte pattern is faster than searching for a regular expression. 388 """ 389 if '?' not in pattern: 390 return BytePattern( HexInput.hexadecimal(pattern) ) 391 return object.__new__(cls, pattern)
392
393 - def __init__(self, hexa):
394 """ 395 Hex patterns must be in this form:: 396 "68 65 6c 6c 6f 20 77 6f 72 6c 64" # "hello world" 397 398 Spaces are optional. Capitalization of hex digits doesn't matter. 399 This is exactly equivalent to the previous example:: 400 "68656C6C6F20776F726C64" # "hello world" 401 402 Wildcards are allowed, in the form of a C{?} sign in any hex digit:: 403 "5? 5? c3" # pop register / pop register / ret 404 "b8 ?? ?? ?? ??" # mov eax, immediate value 405 406 @type hexa: str 407 @param hexa: Pattern to search for. 408 """ 409 maxLength = len([x for x in hexa 410 if x in "?0123456789ABCDEFabcdef"]) / 2 411 super(HexPattern, self).__init__(HexInput.pattern(hexa), 412 maxLength = maxLength)
413
414 #============================================================================== 415 416 -class Search (StaticClass):
417 """ 418 Static class to group the search functionality. 419 420 Do not instance this class! Use its static methods instead. 421 """ 422 423 # TODO: aligned searches 424 # TODO: method to coalesce search results 425 # TODO: search memory dumps 426 # TODO: search non-ascii C strings 427 428 @staticmethod
429 - def search_process(process, pattern, minAddr = None, 430 maxAddr = None, 431 bufferPages = None, 432 overlapping = False):
433 """ 434 Search for the given pattern within the process memory. 435 436 @type process: L{Process} 437 @param process: Process to search. 438 439 @type pattern: L{Pattern} 440 @param pattern: Pattern to search for. 441 It must be an instance of a subclass of L{Pattern}. 442 443 The following L{Pattern} subclasses are provided by WinAppDbg: 444 - L{BytePattern} 445 - L{TextPattern} 446 - L{RegExpPattern} 447 - L{HexPattern} 448 449 You can also write your own subclass of L{Pattern} for customized 450 searches. 451 452 @type minAddr: int 453 @param minAddr: (Optional) Start the search at this memory address. 454 455 @type maxAddr: int 456 @param maxAddr: (Optional) Stop the search at this memory address. 457 458 @type bufferPages: int 459 @param bufferPages: (Optional) Number of memory pages to buffer when 460 performing the search. Valid values are: 461 - C{0} or C{None}: 462 Automatically determine the required buffer size. May not give 463 complete results for regular expressions that match variable 464 sized strings. 465 - C{> 0}: Set the buffer size, in memory pages. 466 - C{< 0}: Disable buffering entirely. This may give you a little 467 speed gain at the cost of an increased memory usage. If the 468 target process has very large contiguous memory regions it may 469 actually be slower or even fail. It's also the only way to 470 guarantee complete results for regular expressions that match 471 variable sized strings. 472 473 @type overlapping: bool 474 @param overlapping: C{True} to allow overlapping results, C{False} 475 otherwise. 476 477 Overlapping results yield the maximum possible number of results. 478 479 For example, if searching for "AAAA" within "AAAAAAAA" at address 480 C{0x10000}, when overlapping is turned off the following matches 481 are yielded:: 482 (0x10000, 4, "AAAA") 483 (0x10004, 4, "AAAA") 484 485 If overlapping is turned on, the following matches are yielded:: 486 (0x10000, 4, "AAAA") 487 (0x10001, 4, "AAAA") 488 (0x10002, 4, "AAAA") 489 (0x10003, 4, "AAAA") 490 (0x10004, 4, "AAAA") 491 492 As you can see, the middle results are overlapping the last two. 493 494 @rtype: iterator of tuple( int, int, str ) 495 @return: An iterator of tuples. Each tuple contains the following: 496 - The memory address where the pattern was found. 497 - The size of the data that matches the pattern. 498 - The data that matches the pattern. 499 500 @raise WindowsError: An error occurred when querying or reading the 501 process memory. 502 """ 503 504 # Do some namespace lookups of symbols we'll be using frequently. 505 MEM_COMMIT = win32.MEM_COMMIT 506 PAGE_GUARD = win32.PAGE_GUARD 507 page = MemoryAddresses.pageSize 508 read = pattern.read 509 find = pattern.find 510 511 # Calculate the address range. 512 if minAddr is None: 513 minAddr = 0 514 if maxAddr is None: 515 maxAddr = win32.LPVOID(-1).value # XXX HACK 516 517 # Calculate the buffer size from the number of pages. 518 if bufferPages is None: 519 try: 520 size = MemoryAddresses.\ 521 align_address_to_page_end(len(pattern)) + page 522 except NotImplementedError: 523 size = None 524 elif bufferPages > 0: 525 size = page * (bufferPages + 1) 526 else: 527 size = None 528 529 # Get the memory map of the process. 530 memory_map = process.iter_memory_map(minAddr, maxAddr) 531 532 # Perform search with buffering enabled. 533 if size: 534 535 # Loop through all memory blocks containing data. 536 buffer = "" # buffer to hold the memory data 537 prev_addr = 0 # previous memory block address 538 last = 0 # position of the last match 539 delta = 0 # delta of last read address and start of buffer 540 for mbi in memory_map: 541 542 # Skip blocks with no data to search on. 543 if not mbi.has_content(): 544 continue 545 546 # Get the address and size of this block. 547 address = mbi.BaseAddress # current address to search on 548 block_size = mbi.RegionSize # total size of the block 549 if address >= maxAddr: 550 break 551 end = address + block_size # end address of the block 552 553 # If the block is contiguous to the previous block, 554 # coalesce the new data in the buffer. 555 if delta and address == prev_addr: 556 buffer += read(process, address, page) 557 558 # If not, clear the buffer and read new data. 559 else: 560 buffer = read(process, address, min(size, block_size)) 561 last = 0 562 delta = 0 563 564 # Search for the pattern in this block. 565 while 1: 566 567 # Yield each match of the pattern in the buffer. 568 pos, length = find(buffer, last) 569 while pos >= last: 570 match_addr = address + pos - delta 571 if minAddr <= match_addr < maxAddr: 572 result = pattern.found( 573 match_addr, length, 574 buffer [ pos : pos + length ] ) 575 if result is not None: 576 yield result 577 if overlapping: 578 last = pos + 1 579 else: 580 last = pos + length 581 pos, length = find(buffer, last) 582 583 # Advance to the next page. 584 address = address + page 585 block_size = block_size - page 586 prev_addr = address 587 588 # Fix the position of the last match. 589 last = last - page 590 if last < 0: 591 last = 0 592 593 # Remove the first page in the buffer. 594 buffer = buffer[ page : ] 595 delta = page 596 597 # If we haven't reached the end of the block yet, 598 # read the next page in the block and keep seaching. 599 if address < end: 600 buffer = buffer + read(process, address, page) 601 602 # Otherwise, we're done searching this block. 603 else: 604 break 605 606 # Perform search with buffering disabled. 607 else: 608 609 # Loop through all memory blocks containing data. 610 for mbi in memory_map: 611 612 # Skip blocks with no data to search on. 613 if not mbi.has_content(): 614 continue 615 616 # Get the address and size of this block. 617 address = mbi.BaseAddress 618 block_size = mbi.RegionSize 619 if address >= maxAddr: 620 break; 621 622 # Read the whole memory region. 623 buffer = process.read(address, block_size) 624 625 # Search for the pattern in this region. 626 pos, length = find(buffer) 627 last = 0 628 while pos >= last: 629 match_addr = address + pos 630 if minAddr <= match_addr < maxAddr: 631 result = pattern.found( 632 match_addr, length, 633 buffer [ pos : pos + length ] ) 634 if result is not None: 635 yield result 636 if overlapping: 637 last = pos + 1 638 else: 639 last = pos + length 640 pos, length = find(buffer, last)
641 642 @classmethod
643 - def extract_ascii_strings(cls, process, minSize = 4, maxSize = 1024):
644 """ 645 Extract ASCII strings from the process memory. 646 647 @type process: L{Process} 648 @param process: Process to search. 649 650 @type minSize: int 651 @param minSize: (Optional) Minimum size of the strings to search for. 652 653 @type maxSize: int 654 @param maxSize: (Optional) Maximum size of the strings to search for. 655 656 @rtype: iterator of tuple(int, int, str) 657 @return: Iterator of strings extracted from the process memory. 658 Each tuple contains the following: 659 - The memory address where the string was found. 660 - The size of the string. 661 - The string. 662 """ 663 regexp = r"[\s\w\!\@\#\$\%%\^\&\*\(\)\{\}\[\]\~\`\'\"\:\;\.\,\\\/\-\+\=\_\<\>]{%d,%d}\0" % (minSize, maxSize) 664 pattern = RegExpPattern(regexp, 0, maxSize) 665 return cls.search_process(process, pattern, overlapping = False)
666