Package winappdbg :: Module module
[hide private]
[frames] | no frames]

Source Code for Module winappdbg.module

   1  #!~/.wine/drive_c/Python25/python.exe 
   2  # -*- coding: utf-8 -*- 
   3   
   4  # Copyright (c) 2009-2014, Mario Vilas 
   5  # All rights reserved. 
   6  # 
   7  # Redistribution and use in source and binary forms, with or without 
   8  # modification, are permitted provided that the following conditions are met: 
   9  # 
  10  #     * Redistributions of source code must retain the above copyright notice, 
  11  #       this list of conditions and the following disclaimer. 
  12  #     * Redistributions in binary form must reproduce the above copyright 
  13  #       notice,this list of conditions and the following disclaimer in the 
  14  #       documentation and/or other materials provided with the distribution. 
  15  #     * Neither the name of the copyright holder nor the names of its 
  16  #       contributors may be used to endorse or promote products derived from 
  17  #       this software without specific prior written permission. 
  18  # 
  19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  20  # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  21  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  22  # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
  23  # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  24  # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  25  # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  26  # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  27  # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  28  # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  29  # POSSIBILITY OF SUCH DAMAGE. 
  30   
  31  """ 
  32  Module instrumentation. 
  33   
  34  @group Instrumentation: 
  35      Module 
  36   
  37  @group Warnings: 
  38      DebugSymbolsWarning 
  39  """ 
  40   
  41  from __future__ import with_statement 
  42   
  43  __revision__ = "$Id: module.py 1299 2013-12-20 09:30:55Z qvasimodo $" 
  44   
  45  __all__ = ['Module', 'DebugSymbolsWarning'] 
  46   
  47  import win32 
  48  from textio import HexInput, HexDump 
  49  from util import PathOperations 
  50   
  51  # delayed imports 
  52  Process = None 
  53   
  54  import os 
  55  import warnings 
  56  import traceback 
57 58 #============================================================================== 59 60 -class DebugSymbolsWarning (UserWarning):
61 """ 62 This warning is issued if the support for debug symbols 63 isn't working properly. 64 """
65
66 #============================================================================== 67 68 -class Module (object):
69 """ 70 Interface to a DLL library loaded in the context of another process. 71 72 @group Properties: 73 get_base, get_filename, get_name, get_size, get_entry_point, 74 get_process, set_process, get_pid, 75 get_handle, set_handle, open_handle, close_handle 76 77 @group Labels: 78 get_label, get_label_at_address, is_address_here, 79 resolve, resolve_label, match_name 80 81 @group Symbols: 82 load_symbols, unload_symbols, get_symbols, iter_symbols, 83 resolve_symbol, get_symbol_at_address 84 85 @group Modules snapshot: 86 clear 87 88 @type unknown: str 89 @cvar unknown: Suggested tag for unknown modules. 90 91 @type lpBaseOfDll: int 92 @ivar lpBaseOfDll: Base of DLL module. 93 Use L{get_base} instead. 94 95 @type hFile: L{FileHandle} 96 @ivar hFile: Handle to the module file. 97 Use L{get_handle} instead. 98 99 @type fileName: str 100 @ivar fileName: Module filename. 101 Use L{get_filename} instead. 102 103 @type SizeOfImage: int 104 @ivar SizeOfImage: Size of the module. 105 Use L{get_size} instead. 106 107 @type EntryPoint: int 108 @ivar EntryPoint: Entry point of the module. 109 Use L{get_entry_point} instead. 110 111 @type process: L{Process} 112 @ivar process: Process where the module is loaded. 113 Use the L{get_process} method instead. 114 """ 115 116 unknown = '<unknown>' 117
118 - class _SymbolEnumerator (object):
119 """ 120 Internally used by L{Module} to enumerate symbols in a module. 121 """ 122
123 - def __init__(self, undecorate = False):
124 self.symbols = list() 125 self.undecorate = undecorate
126
127 - def __call__(self, SymbolName, SymbolAddress, SymbolSize, UserContext):
128 """ 129 Callback that receives symbols and stores them in a Python list. 130 """ 131 if self.undecorate: 132 try: 133 SymbolName = win32.UnDecorateSymbolName(SymbolName) 134 except Exception, e: 135 pass # not all symbols are decorated! 136 self.symbols.append( (SymbolName, SymbolAddress, SymbolSize) ) 137 return win32.TRUE
138
139 - def __init__(self, lpBaseOfDll, hFile = None, fileName = None, 140 SizeOfImage = None, 141 EntryPoint = None, 142 process = None):
143 """ 144 @type lpBaseOfDll: str 145 @param lpBaseOfDll: Base address of the module. 146 147 @type hFile: L{FileHandle} 148 @param hFile: (Optional) Handle to the module file. 149 150 @type fileName: str 151 @param fileName: (Optional) Module filename. 152 153 @type SizeOfImage: int 154 @param SizeOfImage: (Optional) Size of the module. 155 156 @type EntryPoint: int 157 @param EntryPoint: (Optional) Entry point of the module. 158 159 @type process: L{Process} 160 @param process: (Optional) Process where the module is loaded. 161 """ 162 self.lpBaseOfDll = lpBaseOfDll 163 self.fileName = fileName 164 self.SizeOfImage = SizeOfImage 165 self.EntryPoint = EntryPoint 166 167 self.__symbols = list() 168 169 self.set_handle(hFile) 170 self.set_process(process)
171 172 # Not really sure if it's a good idea... 173 ## def __eq__(self, aModule): 174 ## """ 175 ## Compare two Module objects. The comparison is made using the process 176 ## IDs and the module bases. 177 ## 178 ## @type aModule: L{Module} 179 ## @param aModule: Another Module object. 180 ## 181 ## @rtype: bool 182 ## @return: C{True} if the two process IDs and module bases are equal, 183 ## C{False} otherwise. 184 ## """ 185 ## return isinstance(aModule, Module) and \ 186 ## self.get_pid() == aModule.get_pid() and \ 187 ## self.get_base() == aModule.get_base() 188
189 - def get_handle(self):
190 """ 191 @rtype: L{Handle} 192 @return: File handle. 193 Returns C{None} if unknown. 194 """ 195 # no way to guess! 196 return self.__hFile
197
198 - def set_handle(self, hFile):
199 """ 200 @type hFile: L{Handle} 201 @param hFile: File handle. Use C{None} to clear. 202 """ 203 if hFile == win32.INVALID_HANDLE_VALUE: 204 hFile = None 205 self.__hFile = hFile
206 207 hFile = property(get_handle, set_handle, doc="") 208
209 - def get_process(self):
210 """ 211 @rtype: L{Process} 212 @return: Parent Process object. 213 Returns C{None} if unknown. 214 """ 215 # no way to guess! 216 return self.__process
217
218 - def set_process(self, process = None):
219 """ 220 Manually set the parent process. Use with care! 221 222 @type process: L{Process} 223 @param process: (Optional) Process object. Use C{None} for no process. 224 """ 225 if process is None: 226 self.__process = None 227 else: 228 global Process # delayed import 229 if Process is None: 230 from process import Process 231 if not isinstance(process, Process): 232 msg = "Parent process must be a Process instance, " 233 msg += "got %s instead" % type(process) 234 raise TypeError(msg) 235 self.__process = process
236 237 process = property(get_process, set_process, doc="") 238
239 - def get_pid(self):
240 """ 241 @rtype: int or None 242 @return: Parent process global ID. 243 Returns C{None} on error. 244 """ 245 process = self.get_process() 246 if process is not None: 247 return process.get_pid()
248
249 - def get_base(self):
250 """ 251 @rtype: int or None 252 @return: Base address of the module. 253 Returns C{None} if unknown. 254 """ 255 return self.lpBaseOfDll
256
257 - def get_size(self):
258 """ 259 @rtype: int or None 260 @return: Base size of the module. 261 Returns C{None} if unknown. 262 """ 263 if not self.SizeOfImage: 264 self.__get_size_and_entry_point() 265 return self.SizeOfImage
266
267 - def get_entry_point(self):
268 """ 269 @rtype: int or None 270 @return: Entry point of the module. 271 Returns C{None} if unknown. 272 """ 273 if not self.EntryPoint: 274 self.__get_size_and_entry_point() 275 return self.EntryPoint
276
277 - def __get_size_and_entry_point(self):
278 "Get the size and entry point of the module using the Win32 API." 279 process = self.get_process() 280 if process: 281 try: 282 handle = process.get_handle( win32.PROCESS_VM_READ | 283 win32.PROCESS_QUERY_INFORMATION ) 284 base = self.get_base() 285 mi = win32.GetModuleInformation(handle, base) 286 self.SizeOfImage = mi.SizeOfImage 287 self.EntryPoint = mi.EntryPoint 288 except WindowsError, e: 289 warnings.warn( 290 "Cannot get size and entry point of module %s, reason: %s"\ 291 % (self.get_name(), e.strerror), RuntimeWarning)
292
293 - def get_filename(self):
294 """ 295 @rtype: str or None 296 @return: Module filename. 297 Returns C{None} if unknown. 298 """ 299 if self.fileName is None: 300 if self.hFile not in (None, win32.INVALID_HANDLE_VALUE): 301 fileName = self.hFile.get_filename() 302 if fileName: 303 fileName = PathOperations.native_to_win32_pathname(fileName) 304 self.fileName = fileName 305 return self.fileName
306
307 - def __filename_to_modname(self, pathname):
308 """ 309 @type pathname: str 310 @param pathname: Pathname to a module. 311 312 @rtype: str 313 @return: Module name. 314 """ 315 filename = PathOperations.pathname_to_filename(pathname) 316 if filename: 317 filename = filename.lower() 318 filepart, extpart = PathOperations.split_extension(filename) 319 if filepart and extpart: 320 modName = filepart 321 else: 322 modName = filename 323 else: 324 modName = pathname 325 return modName
326
327 - def get_name(self):
328 """ 329 @rtype: str 330 @return: Module name, as used in labels. 331 332 @warning: Names are B{NOT} guaranteed to be unique. 333 334 If you need unique identification for a loaded module, 335 use the base address instead. 336 337 @see: L{get_label} 338 """ 339 pathname = self.get_filename() 340 if pathname: 341 modName = self.__filename_to_modname(pathname) 342 if isinstance(modName, unicode): 343 try: 344 modName = modName.encode('cp1252') 345 except UnicodeEncodeError, e: 346 warnings.warn(str(e)) 347 else: 348 modName = "0x%x" % self.get_base() 349 return modName
350
351 - def match_name(self, name):
352 """ 353 @rtype: bool 354 @return: 355 C{True} if the given name could refer to this module. 356 It may not be exactly the same returned by L{get_name}. 357 """ 358 359 # If the given name is exactly our name, return True. 360 # Comparison is case insensitive. 361 my_name = self.get_name().lower() 362 if name.lower() == my_name: 363 return True 364 365 # If the given name is a base address, compare it with ours. 366 try: 367 base = HexInput.integer(name) 368 except ValueError: 369 base = None 370 if base is not None and base == self.get_base(): 371 return True 372 373 # If the given name is a filename, convert it to a module name. 374 # Then compare it with ours, case insensitive. 375 modName = self.__filename_to_modname(name) 376 if modName.lower() == my_name: 377 return True 378 379 # No match. 380 return False
381 382 #------------------------------------------------------------------------------ 383
384 - def open_handle(self):
385 """ 386 Opens a new handle to the module. 387 388 The new handle is stored in the L{hFile} property. 389 """ 390 391 if not self.get_filename(): 392 msg = "Cannot retrieve filename for module at %s" 393 msg = msg % HexDump.address( self.get_base() ) 394 raise Exception(msg) 395 396 hFile = win32.CreateFile(self.get_filename(), 397 dwShareMode = win32.FILE_SHARE_READ, 398 dwCreationDisposition = win32.OPEN_EXISTING) 399 400 # In case hFile was set to an actual handle value instead of a Handle 401 # object. This shouldn't happen unless the user tinkered with hFile. 402 if not hasattr(self.hFile, '__del__'): 403 self.close_handle() 404 405 self.hFile = hFile
406
407 - def close_handle(self):
408 """ 409 Closes the handle to the module. 410 411 @note: Normally you don't need to call this method. All handles 412 created by I{WinAppDbg} are automatically closed when the garbage 413 collector claims them. So unless you've been tinkering with it, 414 setting L{hFile} to C{None} should be enough. 415 """ 416 try: 417 if hasattr(self.hFile, 'close'): 418 self.hFile.close() 419 elif self.hFile not in (None, win32.INVALID_HANDLE_VALUE): 420 win32.CloseHandle(self.hFile) 421 finally: 422 self.hFile = None
423
424 - def get_handle(self):
425 """ 426 @rtype: L{FileHandle} 427 @return: Handle to the module file. 428 """ 429 if self.hFile in (None, win32.INVALID_HANDLE_VALUE): 430 self.open_handle() 431 return self.hFile
432
433 - def clear(self):
434 """ 435 Clears the resources held by this object. 436 """ 437 try: 438 self.set_process(None) 439 finally: 440 self.close_handle()
441 442 #------------------------------------------------------------------------------ 443 444 # XXX FIXME 445 # I've been told sometimes the debugging symbols APIs don't correctly 446 # handle redirected exports (for example ws2_32!recv). 447 # I haven't been able to reproduce the bug yet.
448 - def load_symbols(self):
449 """ 450 Loads the debugging symbols for a module. 451 Automatically called by L{get_symbols}. 452 """ 453 if win32.PROCESS_ALL_ACCESS == win32.PROCESS_ALL_ACCESS_VISTA: 454 dwAccess = win32.PROCESS_QUERY_LIMITED_INFORMATION 455 else: 456 dwAccess = win32.PROCESS_QUERY_INFORMATION 457 hProcess = self.get_process().get_handle(dwAccess) 458 hFile = self.hFile 459 BaseOfDll = self.get_base() 460 SizeOfDll = self.get_size() 461 Enumerator = self._SymbolEnumerator() 462 try: 463 win32.SymInitialize(hProcess) 464 SymOptions = win32.SymGetOptions() 465 SymOptions |= ( 466 win32.SYMOPT_ALLOW_ZERO_ADDRESS | 467 win32.SYMOPT_CASE_INSENSITIVE | 468 win32.SYMOPT_FAVOR_COMPRESSED | 469 win32.SYMOPT_INCLUDE_32BIT_MODULES | 470 win32.SYMOPT_UNDNAME 471 ) 472 SymOptions &= ~( 473 win32.SYMOPT_LOAD_LINES | 474 win32.SYMOPT_NO_IMAGE_SEARCH | 475 win32.SYMOPT_NO_CPP | 476 win32.SYMOPT_IGNORE_NT_SYMPATH 477 ) 478 win32.SymSetOptions(SymOptions) 479 try: 480 win32.SymSetOptions( 481 SymOptions | win32.SYMOPT_ALLOW_ABSOLUTE_SYMBOLS) 482 except WindowsError: 483 pass 484 try: 485 try: 486 success = win32.SymLoadModule64( 487 hProcess, hFile, None, None, BaseOfDll, SizeOfDll) 488 except WindowsError: 489 success = 0 490 if not success: 491 ImageName = self.get_filename() 492 success = win32.SymLoadModule64( 493 hProcess, None, ImageName, None, BaseOfDll, SizeOfDll) 494 if success: 495 try: 496 win32.SymEnumerateSymbols64( 497 hProcess, BaseOfDll, Enumerator) 498 finally: 499 win32.SymUnloadModule64(hProcess, BaseOfDll) 500 finally: 501 win32.SymCleanup(hProcess) 502 except WindowsError, e: 503 msg = "Cannot load debug symbols for process ID %d, reason:\n%s" 504 msg = msg % (self.get_pid(), traceback.format_exc(e)) 505 warnings.warn(msg, DebugSymbolsWarning) 506 self.__symbols = Enumerator.symbols
507
508 - def unload_symbols(self):
509 """ 510 Unloads the debugging symbols for a module. 511 """ 512 self.__symbols = list()
513
514 - def get_symbols(self):
515 """ 516 Returns the debugging symbols for a module. 517 The symbols are automatically loaded when needed. 518 519 @rtype: list of tuple( str, int, int ) 520 @return: List of symbols. 521 Each symbol is represented by a tuple that contains: 522 - Symbol name 523 - Symbol memory address 524 - Symbol size in bytes 525 """ 526 if not self.__symbols: 527 self.load_symbols() 528 return list(self.__symbols)
529
530 - def iter_symbols(self):
531 """ 532 Returns an iterator for the debugging symbols in a module, 533 in no particular order. 534 The symbols are automatically loaded when needed. 535 536 @rtype: iterator of tuple( str, int, int ) 537 @return: Iterator of symbols. 538 Each symbol is represented by a tuple that contains: 539 - Symbol name 540 - Symbol memory address 541 - Symbol size in bytes 542 """ 543 if not self.__symbols: 544 self.load_symbols() 545 return self.__symbols.__iter__()
546
547 - def resolve_symbol(self, symbol, bCaseSensitive = False):
548 """ 549 Resolves a debugging symbol's address. 550 551 @type symbol: str 552 @param symbol: Name of the symbol to resolve. 553 554 @type bCaseSensitive: bool 555 @param bCaseSensitive: C{True} for case sensitive matches, 556 C{False} for case insensitive. 557 558 @rtype: int or None 559 @return: Memory address of symbol. C{None} if not found. 560 """ 561 if bCaseSensitive: 562 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 563 if symbol == SymbolName: 564 return SymbolAddress 565 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 566 try: 567 SymbolName = win32.UnDecorateSymbolName(SymbolName) 568 except Exception, e: 569 continue 570 if symbol == SymbolName: 571 return SymbolAddress 572 else: 573 symbol = symbol.lower() 574 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 575 if symbol == SymbolName.lower(): 576 return SymbolAddress 577 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 578 try: 579 SymbolName = win32.UnDecorateSymbolName(SymbolName) 580 except Exception, e: 581 continue 582 if symbol == SymbolName.lower(): 583 return SymbolAddress
584
585 - def get_symbol_at_address(self, address):
586 """ 587 Tries to find the closest matching symbol for the given address. 588 589 @type address: int 590 @param address: Memory address to query. 591 592 @rtype: None or tuple( str, int, int ) 593 @return: Returns a tuple consisting of: 594 - Name 595 - Address 596 - Size (in bytes) 597 Returns C{None} if no symbol could be matched. 598 """ 599 found = None 600 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 601 if SymbolAddress > address: 602 continue 603 if SymbolAddress + SymbolSize > address: 604 if not found or found[1] < SymbolAddress: 605 found = (SymbolName, SymbolAddress, SymbolSize) 606 return found
607 608 #------------------------------------------------------------------------------ 609
610 - def get_label(self, function = None, offset = None):
611 """ 612 Retrieves the label for the given function of this module or the module 613 base address if no function name is given. 614 615 @type function: str 616 @param function: (Optional) Exported function name. 617 618 @type offset: int 619 @param offset: (Optional) Offset from the module base address. 620 621 @rtype: str 622 @return: Label for the module base address, plus the offset if given. 623 """ 624 return _ModuleContainer.parse_label(self.get_name(), function, offset)
625
626 - def get_label_at_address(self, address, offset = None):
627 """ 628 Creates a label from the given memory address. 629 630 If the address belongs to the module, the label is made relative to 631 it's base address. 632 633 @type address: int 634 @param address: Memory address. 635 636 @type offset: None or int 637 @param offset: (Optional) Offset value. 638 639 @rtype: str 640 @return: Label pointing to the given address. 641 """ 642 643 # Add the offset to the address. 644 if offset: 645 address = address + offset 646 647 # Make the label relative to the base address if no match is found. 648 module = self.get_name() 649 function = None 650 offset = address - self.get_base() 651 652 # Make the label relative to the entrypoint if no other match is found. 653 # Skip if the entry point is unknown. 654 start = self.get_entry_point() 655 if start and start <= address: 656 function = "start" 657 offset = address - start 658 659 # Enumerate exported functions and debug symbols, 660 # then find the closest match, if possible. 661 try: 662 symbol = self.get_symbol_at_address(address) 663 if symbol: 664 (SymbolName, SymbolAddress, SymbolSize) = symbol 665 new_offset = address - SymbolAddress 666 if new_offset <= offset: 667 function = SymbolName 668 offset = new_offset 669 except WindowsError, e: 670 pass 671 672 # Parse the label and return it. 673 return _ModuleContainer.parse_label(module, function, offset)
674
675 - def is_address_here(self, address):
676 """ 677 Tries to determine if the given address belongs to this module. 678 679 @type address: int 680 @param address: Memory address. 681 682 @rtype: bool or None 683 @return: C{True} if the address belongs to the module, 684 C{False} if it doesn't, 685 and C{None} if it can't be determined. 686 """ 687 base = self.get_base() 688 size = self.get_size() 689 if base and size: 690 return base <= address < (base + size) 691 return None
692
693 - def resolve(self, function):
694 """ 695 Resolves a function exported by this module. 696 697 @type function: str or int 698 @param function: 699 str: Name of the function. 700 int: Ordinal of the function. 701 702 @rtype: int 703 @return: Memory address of the exported function in the process. 704 Returns None on error. 705 """ 706 707 # Unknown DLL filename, there's nothing we can do. 708 filename = self.get_filename() 709 if not filename: 710 return None 711 712 # If the DLL is already mapped locally, resolve the function. 713 try: 714 hlib = win32.GetModuleHandle(filename) 715 address = win32.GetProcAddress(hlib, function) 716 except WindowsError, e: 717 718 # Load the DLL locally, resolve the function and unload it. 719 try: 720 hlib = win32.LoadLibraryEx(filename, 721 win32.DONT_RESOLVE_DLL_REFERENCES) 722 try: 723 address = win32.GetProcAddress(hlib, function) 724 finally: 725 win32.FreeLibrary(hlib) 726 except WindowsError, e: 727 return None 728 729 # A NULL pointer means the function was not found. 730 if address in (None, 0): 731 return None 732 733 # Compensate for DLL base relocations locally and remotely. 734 return address - hlib + self.lpBaseOfDll
735
736 - def resolve_label(self, label):
737 """ 738 Resolves a label for this module only. If the label refers to another 739 module, an exception is raised. 740 741 @type label: str 742 @param label: Label to resolve. 743 744 @rtype: int 745 @return: Memory address pointed to by the label. 746 747 @raise ValueError: The label is malformed or impossible to resolve. 748 @raise RuntimeError: Cannot resolve the module or function. 749 """ 750 751 # Split the label into it's components. 752 # Use the fuzzy mode whenever possible. 753 aProcess = self.get_process() 754 if aProcess is not None: 755 (module, procedure, offset) = aProcess.split_label(label) 756 else: 757 (module, procedure, offset) = _ModuleContainer.split_label(label) 758 759 # If a module name is given that doesn't match ours, 760 # raise an exception. 761 if module and not self.match_name(module): 762 raise RuntimeError("Label does not belong to this module") 763 764 # Resolve the procedure if given. 765 if procedure: 766 address = self.resolve(procedure) 767 if address is None: 768 769 # If it's a debug symbol, use the symbol. 770 address = self.resolve_symbol(procedure) 771 772 # If it's the keyword "start" use the entry point. 773 if address is None and procedure == "start": 774 address = self.get_entry_point() 775 776 # The procedure was not found. 777 if address is None: 778 if not module: 779 module = self.get_name() 780 msg = "Can't find procedure %s in module %s" 781 raise RuntimeError(msg % (procedure, module)) 782 783 # If no procedure is given use the base address of the module. 784 else: 785 address = self.get_base() 786 787 # Add the offset if given and return the resolved address. 788 if offset: 789 address = address + offset 790 return address
791
792 #============================================================================== 793 794 # TODO 795 # An alternative approach to the toolhelp32 snapshots: parsing the PEB and 796 # fetching the list of loaded modules from there. That would solve the problem 797 # of toolhelp32 not working when the process hasn't finished initializing. 798 # See: http://pferrie.host22.com/misc/lowlevel3.htm 799 800 -class _ModuleContainer (object):
801 """ 802 Encapsulates the capability to contain Module objects. 803 804 @note: Labels are an approximated way of referencing memory locations 805 across different executions of the same process, or different processes 806 with common modules. They are not meant to be perfectly unique, and 807 some errors may occur when multiple modules with the same name are 808 loaded, or when module filenames can't be retrieved. 809 810 @group Modules snapshot: 811 scan_modules, 812 get_module, get_module_bases, get_module_count, 813 get_module_at_address, get_module_by_name, 814 has_module, iter_modules, iter_module_addresses, 815 clear_modules 816 817 @group Labels: 818 parse_label, split_label, sanitize_label, resolve_label, 819 resolve_label_components, get_label_at_address, split_label_strict, 820 split_label_fuzzy 821 822 @group Symbols: 823 load_symbols, unload_symbols, get_symbols, iter_symbols, 824 resolve_symbol, get_symbol_at_address 825 826 @group Debugging: 827 is_system_defined_breakpoint, get_system_breakpoint, 828 get_user_breakpoint, get_breakin_breakpoint, 829 get_wow64_system_breakpoint, get_wow64_user_breakpoint, 830 get_wow64_breakin_breakpoint, get_break_on_error_ptr 831 """ 832
833 - def __init__(self):
834 self.__moduleDict = dict() 835 self.__system_breakpoints = dict() 836 837 # Replace split_label with the fuzzy version on object instances. 838 self.split_label = self.__use_fuzzy_mode
839
840 - def __initialize_snapshot(self):
841 """ 842 Private method to automatically initialize the snapshot 843 when you try to use it without calling any of the scan_* 844 methods first. You don't need to call this yourself. 845 """ 846 if not self.__moduleDict: 847 try: 848 self.scan_modules() 849 except WindowsError: 850 pass
851
852 - def __contains__(self, anObject):
853 """ 854 @type anObject: L{Module}, int 855 @param anObject: 856 - C{Module}: Module object to look for. 857 - C{int}: Base address of the DLL to look for. 858 859 @rtype: bool 860 @return: C{True} if the snapshot contains 861 a L{Module} object with the same base address. 862 """ 863 if isinstance(anObject, Module): 864 anObject = anObject.lpBaseOfDll 865 return self.has_module(anObject)
866
867 - def __iter__(self):
868 """ 869 @see: L{iter_modules} 870 @rtype: dictionary-valueiterator 871 @return: Iterator of L{Module} objects in this snapshot. 872 """ 873 return self.iter_modules()
874
875 - def __len__(self):
876 """ 877 @see: L{get_module_count} 878 @rtype: int 879 @return: Count of L{Module} objects in this snapshot. 880 """ 881 return self.get_module_count()
882
883 - def has_module(self, lpBaseOfDll):
884 """ 885 @type lpBaseOfDll: int 886 @param lpBaseOfDll: Base address of the DLL to look for. 887 888 @rtype: bool 889 @return: C{True} if the snapshot contains a 890 L{Module} object with the given base address. 891 """ 892 self.__initialize_snapshot() 893 return lpBaseOfDll in self.__moduleDict
894
895 - def get_module(self, lpBaseOfDll):
896 """ 897 @type lpBaseOfDll: int 898 @param lpBaseOfDll: Base address of the DLL to look for. 899 900 @rtype: L{Module} 901 @return: Module object with the given base address. 902 """ 903 self.__initialize_snapshot() 904 if lpBaseOfDll not in self.__moduleDict: 905 msg = "Unknown DLL base address %s" 906 msg = msg % HexDump.address(lpBaseOfDll) 907 raise KeyError(msg) 908 return self.__moduleDict[lpBaseOfDll]
909
910 - def iter_module_addresses(self):
911 """ 912 @see: L{iter_modules} 913 @rtype: dictionary-keyiterator 914 @return: Iterator of DLL base addresses in this snapshot. 915 """ 916 self.__initialize_snapshot() 917 return self.__moduleDict.iterkeys()
918
919 - def iter_modules(self):
920 """ 921 @see: L{iter_module_addresses} 922 @rtype: dictionary-valueiterator 923 @return: Iterator of L{Module} objects in this snapshot. 924 """ 925 self.__initialize_snapshot() 926 return self.__moduleDict.itervalues()
927
928 - def get_module_bases(self):
929 """ 930 @see: L{iter_module_addresses} 931 @rtype: list( int... ) 932 @return: List of DLL base addresses in this snapshot. 933 """ 934 self.__initialize_snapshot() 935 return self.__moduleDict.keys()
936
937 - def get_module_count(self):
938 """ 939 @rtype: int 940 @return: Count of L{Module} objects in this snapshot. 941 """ 942 self.__initialize_snapshot() 943 return len(self.__moduleDict)
944 945 #------------------------------------------------------------------------------ 946
947 - def get_module_by_name(self, modName):
948 """ 949 @type modName: int 950 @param modName: 951 Name of the module to look for, as returned by L{Module.get_name}. 952 If two or more modules with the same name are loaded, only one 953 of the matching modules is returned. 954 955 You can also pass a full pathname to the DLL file. 956 This works correctly even if two modules with the same name 957 are loaded from different paths. 958 959 @rtype: L{Module} 960 @return: C{Module} object that best matches the given name. 961 Returns C{None} if no C{Module} can be found. 962 """ 963 964 # Convert modName to lowercase. 965 # This helps make case insensitive string comparisons. 966 modName = modName.lower() 967 968 # modName is an absolute pathname. 969 if PathOperations.path_is_absolute(modName): 970 for lib in self.iter_modules(): 971 if modName == lib.get_filename().lower(): 972 return lib 973 return None # Stop trying to match the name. 974 975 # Get all the module names. 976 # This prevents having to iterate through the module list 977 # more than once. 978 modDict = [ ( lib.get_name(), lib ) for lib in self.iter_modules() ] 979 modDict = dict(modDict) 980 981 # modName is a base filename. 982 if modName in modDict: 983 return modDict[modName] 984 985 # modName is a base filename without extension. 986 filepart, extpart = PathOperations.split_extension(modName) 987 if filepart and extpart: 988 if filepart in modDict: 989 return modDict[filepart] 990 991 # modName is a base address. 992 try: 993 baseAddress = HexInput.integer(modName) 994 except ValueError: 995 return None 996 if self.has_module(baseAddress): 997 return self.get_module(baseAddress) 998 999 # Module not found. 1000 return None
1001
1002 - def get_module_at_address(self, address):
1003 """ 1004 @type address: int 1005 @param address: Memory address to query. 1006 1007 @rtype: L{Module} 1008 @return: C{Module} object that best matches the given address. 1009 Returns C{None} if no C{Module} can be found. 1010 """ 1011 bases = self.get_module_bases() 1012 bases.sort() 1013 bases.append(0x10000000000000000L) # max. 64 bit address + 1 1014 if address >= bases[0]: 1015 i = 0 1016 max_i = len(bases) - 1 1017 while i < max_i: 1018 begin, end = bases[i:i+2] 1019 if begin <= address < end: 1020 module = self.get_module(begin) 1021 here = module.is_address_here(address) 1022 if here is False: 1023 break 1024 else: # True or None 1025 return module 1026 i = i + 1 1027 return None
1028 1029 # XXX this method musn't end up calling __initialize_snapshot by accident!
1030 - def scan_modules(self):
1031 """ 1032 Populates the snapshot with loaded modules. 1033 """ 1034 1035 # The module filenames may be spoofed by malware, 1036 # since this information resides in usermode space. 1037 # See: http://www.ragestorm.net/blogs/?p=163 1038 1039 # Ignore special process IDs. 1040 # PID 0: System Idle Process. Also has a special meaning to the 1041 # toolhelp APIs (current process). 1042 # PID 4: System Integrity Group. See this forum post for more info: 1043 # http://tinyurl.com/ycza8jo 1044 # (points to social.technet.microsoft.com) 1045 # Only on XP and above 1046 # PID 8: System (?) only in Windows 2000 and below AFAIK. 1047 # It's probably the same as PID 4 in XP and above. 1048 dwProcessId = self.get_pid() 1049 if dwProcessId in (0, 4, 8): 1050 return 1051 1052 # It would seem easier to clear the snapshot first. 1053 # But then all open handles would be closed. 1054 found_bases = set() 1055 with win32.CreateToolhelp32Snapshot(win32.TH32CS_SNAPMODULE, 1056 dwProcessId) as hSnapshot: 1057 me = win32.Module32First(hSnapshot) 1058 while me is not None: 1059 lpBaseAddress = me.modBaseAddr 1060 fileName = me.szExePath # full pathname 1061 if not fileName: 1062 fileName = me.szModule # filename only 1063 if not fileName: 1064 fileName = None 1065 else: 1066 fileName = PathOperations.native_to_win32_pathname(fileName) 1067 found_bases.add(lpBaseAddress) 1068 ## if not self.has_module(lpBaseAddress): # XXX triggers a scan 1069 if lpBaseAddress not in self.__moduleDict: 1070 aModule = Module(lpBaseAddress, fileName = fileName, 1071 SizeOfImage = me.modBaseSize, 1072 process = self) 1073 self._add_module(aModule) 1074 else: 1075 aModule = self.get_module(lpBaseAddress) 1076 if not aModule.fileName: 1077 aModule.fileName = fileName 1078 if not aModule.SizeOfImage: 1079 aModule.SizeOfImage = me.modBaseSize 1080 if not aModule.process: 1081 aModule.process = self 1082 me = win32.Module32Next(hSnapshot) 1083 ## for base in self.get_module_bases(): # XXX triggers a scan 1084 for base in self.__moduleDict.keys(): 1085 if base not in found_bases: 1086 self._del_module(base)
1087
1088 - def clear_modules(self):
1089 """ 1090 Clears the modules snapshot. 1091 """ 1092 for aModule in self.__moduleDict.itervalues(): 1093 aModule.clear() 1094 self.__moduleDict = dict()
1095 1096 #------------------------------------------------------------------------------ 1097 1098 @staticmethod
1099 - def parse_label(module = None, function = None, offset = None):
1100 """ 1101 Creates a label from a module and a function name, plus an offset. 1102 1103 @warning: This method only creates the label, it doesn't make sure the 1104 label actually points to a valid memory location. 1105 1106 @type module: None or str 1107 @param module: (Optional) Module name. 1108 1109 @type function: None, str or int 1110 @param function: (Optional) Function name or ordinal. 1111 1112 @type offset: None or int 1113 @param offset: (Optional) Offset value. 1114 1115 If C{function} is specified, offset from the function. 1116 1117 If C{function} is C{None}, offset from the module. 1118 1119 @rtype: str 1120 @return: 1121 Label representing the given function in the given module. 1122 1123 @raise ValueError: 1124 The module or function name contain invalid characters. 1125 """ 1126 1127 # TODO 1128 # Invalid characters should be escaped or filtered. 1129 1130 # Convert ordinals to strings. 1131 try: 1132 function = "#0x%x" % function 1133 except TypeError: 1134 pass 1135 1136 # Validate the parameters. 1137 if module is not None and ('!' in module or '+' in module): 1138 raise ValueError("Invalid module name: %s" % module) 1139 if function is not None and ('!' in function or '+' in function): 1140 raise ValueError("Invalid function name: %s" % function) 1141 1142 # Parse the label. 1143 if module: 1144 if function: 1145 if offset: 1146 label = "%s!%s+0x%x" % (module, function, offset) 1147 else: 1148 label = "%s!%s" % (module, function) 1149 else: 1150 if offset: 1151 ## label = "%s+0x%x!" % (module, offset) 1152 label = "%s!0x%x" % (module, offset) 1153 else: 1154 label = "%s!" % module 1155 else: 1156 if function: 1157 if offset: 1158 label = "!%s+0x%x" % (function, offset) 1159 else: 1160 label = "!%s" % function 1161 else: 1162 if offset: 1163 label = "0x%x" % offset 1164 else: 1165 label = "0x0" 1166 1167 return label
1168 1169 @staticmethod
1170 - def split_label_strict(label):
1171 """ 1172 Splits a label created with L{parse_label}. 1173 1174 To parse labels with a less strict syntax, use the L{split_label_fuzzy} 1175 method instead. 1176 1177 @warning: This method only parses the label, it doesn't make sure the 1178 label actually points to a valid memory location. 1179 1180 @type label: str 1181 @param label: Label to split. 1182 1183 @rtype: tuple( str or None, str or int or None, int or None ) 1184 @return: Tuple containing the C{module} name, 1185 the C{function} name or ordinal, and the C{offset} value. 1186 1187 If the label doesn't specify a module, 1188 then C{module} is C{None}. 1189 1190 If the label doesn't specify a function, 1191 then C{function} is C{None}. 1192 1193 If the label doesn't specify an offset, 1194 then C{offset} is C{0}. 1195 1196 @raise ValueError: The label is malformed. 1197 """ 1198 module = function = None 1199 offset = 0 1200 1201 # Special case: None 1202 if not label: 1203 label = "0x0" 1204 else: 1205 1206 # Remove all blanks. 1207 label = label.replace(' ', '') 1208 label = label.replace('\t', '') 1209 label = label.replace('\r', '') 1210 label = label.replace('\n', '') 1211 1212 # Special case: empty label. 1213 if not label: 1214 label = "0x0" 1215 1216 # * ! * 1217 if '!' in label: 1218 try: 1219 module, function = label.split('!') 1220 except ValueError: 1221 raise ValueError("Malformed label: %s" % label) 1222 1223 # module ! function 1224 if function: 1225 if '+' in module: 1226 raise ValueError("Malformed label: %s" % label) 1227 1228 # module ! function + offset 1229 if '+' in function: 1230 try: 1231 function, offset = function.split('+') 1232 except ValueError: 1233 raise ValueError("Malformed label: %s" % label) 1234 try: 1235 offset = HexInput.integer(offset) 1236 except ValueError: 1237 raise ValueError("Malformed label: %s" % label) 1238 else: 1239 1240 # module ! offset 1241 try: 1242 offset = HexInput.integer(function) 1243 function = None 1244 except ValueError: 1245 pass 1246 else: 1247 1248 # module + offset ! 1249 if '+' in module: 1250 try: 1251 module, offset = module.split('+') 1252 except ValueError: 1253 raise ValueError("Malformed label: %s" % label) 1254 try: 1255 offset = HexInput.integer(offset) 1256 except ValueError: 1257 raise ValueError("Malformed label: %s" % label) 1258 1259 else: 1260 1261 # module ! 1262 try: 1263 offset = HexInput.integer(module) 1264 module = None 1265 1266 # offset ! 1267 except ValueError: 1268 pass 1269 1270 if not module: 1271 module = None 1272 if not function: 1273 function = None 1274 1275 # * 1276 else: 1277 1278 # offset 1279 try: 1280 offset = HexInput.integer(label) 1281 1282 # # ordinal 1283 except ValueError: 1284 if label.startswith('#'): 1285 function = label 1286 try: 1287 HexInput.integer(function[1:]) 1288 1289 # module? 1290 # function? 1291 except ValueError: 1292 raise ValueError("Ambiguous label: %s" % label) 1293 1294 # module? 1295 # function? 1296 else: 1297 raise ValueError("Ambiguous label: %s" % label) 1298 1299 # Convert function ordinal strings into integers. 1300 if function and function.startswith('#'): 1301 try: 1302 function = HexInput.integer(function[1:]) 1303 except ValueError: 1304 pass 1305 1306 # Convert null offsets to None. 1307 if not offset: 1308 offset = None 1309 1310 return (module, function, offset)
1311
1312 - def split_label_fuzzy(self, label):
1313 """ 1314 Splits a label entered as user input. 1315 1316 It's more flexible in it's syntax parsing than the L{split_label_strict} 1317 method, as it allows the exclamation mark (B{C{!}}) to be omitted. The 1318 ambiguity is resolved by searching the modules in the snapshot to guess 1319 if a label refers to a module or a function. It also tries to rebuild 1320 labels when they contain hardcoded addresses. 1321 1322 @warning: This method only parses the label, it doesn't make sure the 1323 label actually points to a valid memory location. 1324 1325 @type label: str 1326 @param label: Label to split. 1327 1328 @rtype: tuple( str or None, str or int or None, int or None ) 1329 @return: Tuple containing the C{module} name, 1330 the C{function} name or ordinal, and the C{offset} value. 1331 1332 If the label doesn't specify a module, 1333 then C{module} is C{None}. 1334 1335 If the label doesn't specify a function, 1336 then C{function} is C{None}. 1337 1338 If the label doesn't specify an offset, 1339 then C{offset} is C{0}. 1340 1341 @raise ValueError: The label is malformed. 1342 """ 1343 module = function = None 1344 offset = 0 1345 1346 # Special case: None 1347 if not label: 1348 label = "0x0" 1349 else: 1350 1351 # Remove all blanks. 1352 label = label.replace(' ', '') 1353 label = label.replace('\t', '') 1354 label = label.replace('\r', '') 1355 label = label.replace('\n', '') 1356 1357 # Special case: empty label. 1358 if not label: 1359 label = "0x0" 1360 1361 # If an exclamation sign is present, we know we can parse it strictly. 1362 if '!' in label: 1363 return self.split_label_strict(label) 1364 1365 ## # Try to parse it strictly, on error do it the fuzzy way. 1366 ## try: 1367 ## return self.split_label(label) 1368 ## except ValueError: 1369 ## pass 1370 1371 # * + offset 1372 if '+' in label: 1373 try: 1374 prefix, offset = label.split('+') 1375 except ValueError: 1376 raise ValueError("Malformed label: %s" % label) 1377 try: 1378 offset = HexInput.integer(offset) 1379 except ValueError: 1380 raise ValueError("Malformed label: %s" % label) 1381 label = prefix 1382 1383 # This parses both filenames and base addresses. 1384 modobj = self.get_module_by_name(label) 1385 if modobj: 1386 1387 # module 1388 # module + offset 1389 module = modobj.get_name() 1390 1391 else: 1392 1393 # TODO 1394 # If 0xAAAAAAAA + 0xBBBBBBBB is given, 1395 # A is interpreted as a module base address, 1396 # and B as an offset. 1397 # If that fails, it'd be good to add A+B and try to 1398 # use the nearest loaded module. 1399 1400 # offset 1401 # base address + offset (when no module has that base address) 1402 try: 1403 address = HexInput.integer(label) 1404 1405 if offset: 1406 # If 0xAAAAAAAA + 0xBBBBBBBB is given, 1407 # A is interpreted as a module base address, 1408 # and B as an offset. 1409 # If that fails, we get here, meaning no module was found 1410 # at A. Then add up A+B and work with that as a hardcoded 1411 # address. 1412 offset = address + offset 1413 else: 1414 # If the label is a hardcoded address, we get here. 1415 offset = address 1416 1417 # If only a hardcoded address is given, 1418 # rebuild the label using get_label_at_address. 1419 # Then parse it again, but this time strictly, 1420 # both because there is no need for fuzzy syntax and 1421 # to prevent an infinite recursion if there's a bug here. 1422 try: 1423 new_label = self.get_label_at_address(offset) 1424 module, function, offset = \ 1425 self.split_label_strict(new_label) 1426 except ValueError: 1427 pass 1428 1429 # function 1430 # function + offset 1431 except ValueError: 1432 function = label 1433 1434 # Convert function ordinal strings into integers. 1435 if function and function.startswith('#'): 1436 try: 1437 function = HexInput.integer(function[1:]) 1438 except ValueError: 1439 pass 1440 1441 # Convert null offsets to None. 1442 if not offset: 1443 offset = None 1444 1445 return (module, function, offset)
1446 1447 @classmethod
1448 - def split_label(cls, label):
1449 """ 1450 Splits a label into it's C{module}, C{function} and C{offset} 1451 components, as used in L{parse_label}. 1452 1453 When called as a static method, the strict syntax mode is used:: 1454 1455 winappdbg.Process.split_label( "kernel32!CreateFileA" ) 1456 1457 When called as an instance method, the fuzzy syntax mode is used:: 1458 1459 aProcessInstance.split_label( "CreateFileA" ) 1460 1461 @see: L{split_label_strict}, L{split_label_fuzzy} 1462 1463 @type label: str 1464 @param label: Label to split. 1465 1466 @rtype: tuple( str or None, str or int or None, int or None ) 1467 @return: 1468 Tuple containing the C{module} name, 1469 the C{function} name or ordinal, and the C{offset} value. 1470 1471 If the label doesn't specify a module, 1472 then C{module} is C{None}. 1473 1474 If the label doesn't specify a function, 1475 then C{function} is C{None}. 1476 1477 If the label doesn't specify an offset, 1478 then C{offset} is C{0}. 1479 1480 @raise ValueError: The label is malformed. 1481 """ 1482 1483 # XXX 1484 # Docstring indentation was removed so epydoc doesn't complain 1485 # when parsing the docs for __use_fuzzy_mode(). 1486 1487 # This function is overwritten by __init__ 1488 # so here is the static implementation only. 1489 return cls.split_label_strict(label)
1490 1491 # The split_label method is replaced with this function by __init__.
1492 - def __use_fuzzy_mode(self, label):
1493 "@see: L{split_label}" 1494 return self.split_label_fuzzy(label)
1495 ## __use_fuzzy_mode.__doc__ = split_label.__doc__ 1496
1497 - def sanitize_label(self, label):
1498 """ 1499 Converts a label taken from user input into a well-formed label. 1500 1501 @type label: str 1502 @param label: Label taken from user input. 1503 1504 @rtype: str 1505 @return: Sanitized label. 1506 """ 1507 (module, function, offset) = self.split_label_fuzzy(label) 1508 label = self.parse_label(module, function, offset) 1509 return label
1510
1511 - def resolve_label(self, label):
1512 """ 1513 Resolve the memory address of the given label. 1514 1515 @note: 1516 If multiple modules with the same name are loaded, 1517 the label may be resolved at any of them. For a more precise 1518 way to resolve functions use the base address to get the L{Module} 1519 object (see L{Process.get_module}) and then call L{Module.resolve}. 1520 1521 If no module name is specified in the label, the function may be 1522 resolved in any loaded module. If you want to resolve all functions 1523 with that name in all processes, call L{Process.iter_modules} to 1524 iterate through all loaded modules, and then try to resolve the 1525 function in each one of them using L{Module.resolve}. 1526 1527 @type label: str 1528 @param label: Label to resolve. 1529 1530 @rtype: int 1531 @return: Memory address pointed to by the label. 1532 1533 @raise ValueError: The label is malformed or impossible to resolve. 1534 @raise RuntimeError: Cannot resolve the module or function. 1535 """ 1536 1537 # Split the label into module, function and offset components. 1538 module, function, offset = self.split_label_fuzzy(label) 1539 1540 # Resolve the components into a memory address. 1541 address = self.resolve_label_components(module, function, offset) 1542 1543 # Return the memory address. 1544 return address
1545
1546 - def resolve_label_components(self, module = None, 1547 function = None, 1548 offset = None):
1549 """ 1550 Resolve the memory address of the given module, function and/or offset. 1551 1552 @note: 1553 If multiple modules with the same name are loaded, 1554 the label may be resolved at any of them. For a more precise 1555 way to resolve functions use the base address to get the L{Module} 1556 object (see L{Process.get_module}) and then call L{Module.resolve}. 1557 1558 If no module name is specified in the label, the function may be 1559 resolved in any loaded module. If you want to resolve all functions 1560 with that name in all processes, call L{Process.iter_modules} to 1561 iterate through all loaded modules, and then try to resolve the 1562 function in each one of them using L{Module.resolve}. 1563 1564 @type module: None or str 1565 @param module: (Optional) Module name. 1566 1567 @type function: None, str or int 1568 @param function: (Optional) Function name or ordinal. 1569 1570 @type offset: None or int 1571 @param offset: (Optional) Offset value. 1572 1573 If C{function} is specified, offset from the function. 1574 1575 If C{function} is C{None}, offset from the module. 1576 1577 @rtype: int 1578 @return: Memory address pointed to by the label. 1579 1580 @raise ValueError: The label is malformed or impossible to resolve. 1581 @raise RuntimeError: Cannot resolve the module or function. 1582 """ 1583 # Default address if no module or function are given. 1584 # An offset may be added later. 1585 address = 0 1586 1587 # Resolve the module. 1588 # If the module is not found, check for the special symbol "main". 1589 if module: 1590 modobj = self.get_module_by_name(module) 1591 if not modobj: 1592 if module == "main": 1593 modobj = self.get_main_module() 1594 else: 1595 raise RuntimeError("Module %r not found" % module) 1596 1597 # Resolve the exported function or debugging symbol. 1598 # If all else fails, check for the special symbol "start". 1599 if function: 1600 address = modobj.resolve(function) 1601 if address is None: 1602 address = modobj.resolve_symbol(function) 1603 if address is None: 1604 if function == "start": 1605 address = modobj.get_entry_point() 1606 if address is None: 1607 msg = "Symbol %r not found in module %s" 1608 raise RuntimeError(msg % (function, module)) 1609 1610 # No function, use the base address. 1611 else: 1612 address = modobj.get_base() 1613 1614 # Resolve the function in any module. 1615 # If all else fails, check for the special symbols "main" and "start". 1616 elif function: 1617 for modobj in self.iter_modules(): 1618 address = modobj.resolve(function) 1619 if address is not None: 1620 break 1621 if address is None: 1622 if function == "start": 1623 modobj = self.get_main_module() 1624 address = modobj.get_entry_point() 1625 elif function == "main": 1626 modobj = self.get_main_module() 1627 address = modobj.get_base() 1628 else: 1629 msg = "Function %r not found in any module" % function 1630 raise RuntimeError(msg) 1631 1632 # Return the address plus the offset. 1633 if offset: 1634 address = address + offset 1635 return address
1636
1637 - def get_label_at_address(self, address, offset = None):
1638 """ 1639 Creates a label from the given memory address. 1640 1641 @warning: This method uses the name of the nearest currently loaded 1642 module. If that module is unloaded later, the label becomes 1643 impossible to resolve. 1644 1645 @type address: int 1646 @param address: Memory address. 1647 1648 @type offset: None or int 1649 @param offset: (Optional) Offset value. 1650 1651 @rtype: str 1652 @return: Label pointing to the given address. 1653 """ 1654 if offset: 1655 address = address + offset 1656 modobj = self.get_module_at_address(address) 1657 if modobj: 1658 label = modobj.get_label_at_address(address) 1659 else: 1660 label = self.parse_label(None, None, address) 1661 return label
1662 1663 #------------------------------------------------------------------------------ 1664 1665 # The memory addresses of system breakpoints are be cached, since they're 1666 # all in system libraries it's not likely they'll ever change their address 1667 # during the lifetime of the process... I don't suppose a program could 1668 # happily unload ntdll.dll and survive.
1669 - def __get_system_breakpoint(self, label):
1670 try: 1671 return self.__system_breakpoints[label] 1672 except KeyError: 1673 try: 1674 address = self.resolve_label(label) 1675 except Exception: 1676 return None 1677 self.__system_breakpoints[label] = address 1678 return address
1679 1680 # It's in kernel32 in Windows Server 2003, in ntdll since Windows Vista. 1681 # It can only be resolved if we have the debug symbols.
1682 - def get_break_on_error_ptr(self):
1683 """ 1684 @rtype: int 1685 @return: 1686 If present, returns the address of the C{g_dwLastErrorToBreakOn} 1687 global variable for this process. If not, returns C{None}. 1688 """ 1689 address = self.__get_system_breakpoint("ntdll!g_dwLastErrorToBreakOn") 1690 if not address: 1691 address = self.__get_system_breakpoint( 1692 "kernel32!g_dwLastErrorToBreakOn") 1693 # cheat a little :) 1694 self.__system_breakpoints["ntdll!g_dwLastErrorToBreakOn"] = address 1695 return address
1696
1697 - def is_system_defined_breakpoint(self, address):
1698 """ 1699 @type address: int 1700 @param address: Memory address. 1701 1702 @rtype: bool 1703 @return: C{True} if the given address points to a system defined 1704 breakpoint. System defined breakpoints are hardcoded into 1705 system libraries. 1706 """ 1707 if address: 1708 module = self.get_module_at_address(address) 1709 if module: 1710 return module.match_name("ntdll") or \ 1711 module.match_name("kernel32") 1712 return False
1713 1714 # FIXME 1715 # In Wine, the system breakpoint seems to be somewhere in kernel32.
1716 - def get_system_breakpoint(self):
1717 """ 1718 @rtype: int or None 1719 @return: Memory address of the system breakpoint 1720 within the process address space. 1721 Returns C{None} on error. 1722 """ 1723 return self.__get_system_breakpoint("ntdll!DbgBreakPoint")
1724 1725 # I don't know when this breakpoint is actually used...
1726 - def get_user_breakpoint(self):
1727 """ 1728 @rtype: int or None 1729 @return: Memory address of the user breakpoint 1730 within the process address space. 1731 Returns C{None} on error. 1732 """ 1733 return self.__get_system_breakpoint("ntdll!DbgUserBreakPoint")
1734 1735 # On some platforms, this breakpoint can only be resolved 1736 # when the debugging symbols for ntdll.dll are loaded.
1737 - def get_breakin_breakpoint(self):
1738 """ 1739 @rtype: int or None 1740 @return: Memory address of the remote breakin breakpoint 1741 within the process address space. 1742 Returns C{None} on error. 1743 """ 1744 return self.__get_system_breakpoint("ntdll!DbgUiRemoteBreakin")
1745 1746 # Equivalent of ntdll!DbgBreakPoint in Wow64.
1748 """ 1749 @rtype: int or None 1750 @return: Memory address of the Wow64 system breakpoint 1751 within the process address space. 1752 Returns C{None} on error. 1753 """ 1754 return self.__get_system_breakpoint("ntdll32!DbgBreakPoint")
1755 1756 # Equivalent of ntdll!DbgUserBreakPoint in Wow64.
1757 - def get_wow64_user_breakpoint(self):
1758 """ 1759 @rtype: int or None 1760 @return: Memory address of the Wow64 user breakpoint 1761 within the process address space. 1762 Returns C{None} on error. 1763 """ 1764 return self.__get_system_breakpoint("ntdll32!DbgUserBreakPoint")
1765 1766 # Equivalent of ntdll!DbgUiRemoteBreakin in Wow64.
1768 """ 1769 @rtype: int or None 1770 @return: Memory address of the Wow64 remote breakin breakpoint 1771 within the process address space. 1772 Returns C{None} on error. 1773 """ 1774 return self.__get_system_breakpoint("ntdll32!DbgUiRemoteBreakin")
1775 1776 #------------------------------------------------------------------------------ 1777
1778 - def load_symbols(self):
1779 """ 1780 Loads the debugging symbols for all modules in this snapshot. 1781 Automatically called by L{get_symbols}. 1782 """ 1783 for aModule in self.iter_modules(): 1784 aModule.load_symbols()
1785
1786 - def unload_symbols(self):
1787 """ 1788 Unloads the debugging symbols for all modules in this snapshot. 1789 """ 1790 for aModule in self.iter_modules(): 1791 aModule.unload_symbols()
1792
1793 - def get_symbols(self):
1794 """ 1795 Returns the debugging symbols for all modules in this snapshot. 1796 The symbols are automatically loaded when needed. 1797 1798 @rtype: list of tuple( str, int, int ) 1799 @return: List of symbols. 1800 Each symbol is represented by a tuple that contains: 1801 - Symbol name 1802 - Symbol memory address 1803 - Symbol size in bytes 1804 """ 1805 symbols = list() 1806 for aModule in self.iter_modules(): 1807 for symbol in aModule.iter_symbols(): 1808 symbols.append(symbol) 1809 return symbols
1810
1811 - def iter_symbols(self):
1812 """ 1813 Returns an iterator for the debugging symbols in all modules in this 1814 snapshot, in no particular order. 1815 The symbols are automatically loaded when needed. 1816 1817 @rtype: iterator of tuple( str, int, int ) 1818 @return: Iterator of symbols. 1819 Each symbol is represented by a tuple that contains: 1820 - Symbol name 1821 - Symbol memory address 1822 - Symbol size in bytes 1823 """ 1824 for aModule in self.iter_modules(): 1825 for symbol in aModule.iter_symbols(): 1826 yield symbol
1827
1828 - def resolve_symbol(self, symbol, bCaseSensitive = False):
1829 """ 1830 Resolves a debugging symbol's address. 1831 1832 @type symbol: str 1833 @param symbol: Name of the symbol to resolve. 1834 1835 @type bCaseSensitive: bool 1836 @param bCaseSensitive: C{True} for case sensitive matches, 1837 C{False} for case insensitive. 1838 1839 @rtype: int or None 1840 @return: Memory address of symbol. C{None} if not found. 1841 """ 1842 if bCaseSensitive: 1843 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 1844 if symbol == SymbolName: 1845 return SymbolAddress 1846 else: 1847 symbol = symbol.lower() 1848 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 1849 if symbol == SymbolName.lower(): 1850 return SymbolAddress
1851
1852 - def get_symbol_at_address(self, address):
1853 """ 1854 Tries to find the closest matching symbol for the given address. 1855 1856 @type address: int 1857 @param address: Memory address to query. 1858 1859 @rtype: None or tuple( str, int, int ) 1860 @return: Returns a tuple consisting of: 1861 - Name 1862 - Address 1863 - Size (in bytes) 1864 Returns C{None} if no symbol could be matched. 1865 """ 1866 # Any module may have symbols pointing anywhere in memory, so there's 1867 # no easy way to optimize this. I guess we're stuck with brute force. 1868 found = None 1869 for (SymbolName, SymbolAddress, SymbolSize) in self.iter_symbols(): 1870 if SymbolAddress <= address: 1871 if SymbolAddress + SymbolSize > address: 1872 if not found or found[1] < SymbolAddress: 1873 found = (SymbolName, SymbolAddress, SymbolSize) 1874 return found
1875 1876 #------------------------------------------------------------------------------ 1877 1878 # XXX _notify_* methods should not trigger a scan 1879
1880 - def _add_module(self, aModule):
1881 """ 1882 Private method to add a module object to the snapshot. 1883 1884 @type aModule: L{Module} 1885 @param aModule: Module object. 1886 """ 1887 ## if not isinstance(aModule, Module): 1888 ## if hasattr(aModule, '__class__'): 1889 ## typename = aModule.__class__.__name__ 1890 ## else: 1891 ## typename = str(type(aModule)) 1892 ## msg = "Expected Module, got %s instead" % typename 1893 ## raise TypeError(msg) 1894 lpBaseOfDll = aModule.get_base() 1895 ## if lpBaseOfDll in self.__moduleDict: 1896 ## msg = "Module already exists: %d" % lpBaseOfDll 1897 ## raise KeyError(msg) 1898 aModule.set_process(self) 1899 self.__moduleDict[lpBaseOfDll] = aModule
1900
1901 - def _del_module(self, lpBaseOfDll):
1902 """ 1903 Private method to remove a module object from the snapshot. 1904 1905 @type lpBaseOfDll: int 1906 @param lpBaseOfDll: Module base address. 1907 """ 1908 try: 1909 aModule = self.__moduleDict[lpBaseOfDll] 1910 del self.__moduleDict[lpBaseOfDll] 1911 except KeyError: 1912 aModule = None 1913 msg = "Unknown base address %d" % HexDump.address(lpBaseOfDll) 1914 warnings.warn(msg, RuntimeWarning) 1915 if aModule: 1916 aModule.clear() # remove circular references
1917
1918 - def __add_loaded_module(self, event):
1919 """ 1920 Private method to automatically add new module objects from debug events. 1921 1922 @type event: L{Event} 1923 @param event: Event object. 1924 """ 1925 lpBaseOfDll = event.get_module_base() 1926 hFile = event.get_file_handle() 1927 ## if not self.has_module(lpBaseOfDll): # XXX this would trigger a scan 1928 if lpBaseOfDll not in self.__moduleDict: 1929 fileName = event.get_filename() 1930 if not fileName: 1931 fileName = None 1932 if hasattr(event, 'get_start_address'): 1933 EntryPoint = event.get_start_address() 1934 else: 1935 EntryPoint = None 1936 aModule = Module(lpBaseOfDll, hFile, fileName = fileName, 1937 EntryPoint = EntryPoint, 1938 process = self) 1939 self._add_module(aModule) 1940 else: 1941 aModule = self.get_module(lpBaseOfDll) 1942 if not aModule.hFile and hFile not in (None, 0, 1943 win32.INVALID_HANDLE_VALUE): 1944 aModule.hFile = hFile 1945 if not aModule.process: 1946 aModule.process = self 1947 if aModule.EntryPoint is None and \ 1948 hasattr(event, 'get_start_address'): 1949 aModule.EntryPoint = event.get_start_address() 1950 if not aModule.fileName: 1951 fileName = event.get_filename() 1952 if fileName: 1953 aModule.fileName = fileName
1954
1955 - def _notify_create_process(self, event):
1956 """ 1957 Notify the load of the main module. 1958 1959 This is done automatically by the L{Debug} class, you shouldn't need 1960 to call it yourself. 1961 1962 @type event: L{CreateProcessEvent} 1963 @param event: Create process event. 1964 1965 @rtype: bool 1966 @return: C{True} to call the user-defined handle, C{False} otherwise. 1967 """ 1968 self.__add_loaded_module(event) 1969 return True
1970
1971 - def _notify_load_dll(self, event):
1972 """ 1973 Notify the load of a new module. 1974 1975 This is done automatically by the L{Debug} class, you shouldn't need 1976 to call it yourself. 1977 1978 @type event: L{LoadDLLEvent} 1979 @param event: Load DLL event. 1980 1981 @rtype: bool 1982 @return: C{True} to call the user-defined handle, C{False} otherwise. 1983 """ 1984 self.__add_loaded_module(event) 1985 return True
1986
1987 - def _notify_unload_dll(self, event):
1988 """ 1989 Notify the release of a loaded module. 1990 1991 This is done automatically by the L{Debug} class, you shouldn't need 1992 to call it yourself. 1993 1994 @type event: L{UnloadDLLEvent} 1995 @param event: Unload DLL event. 1996 1997 @rtype: bool 1998 @return: C{True} to call the user-defined handle, C{False} otherwise. 1999 """ 2000 lpBaseOfDll = event.get_module_base() 2001 ## if self.has_module(lpBaseOfDll): # XXX this would trigger a scan 2002 if lpBaseOfDll in self.__moduleDict: 2003 self._del_module(lpBaseOfDll) 2004 return True
2005