Package winappdbg :: Module disasm
[hide private]
[frames] | no frames]

Source Code for Module winappdbg.disasm

  1  #!~/.wine/drive_c/Python25/python.exe 
  2  # -*- coding: utf-8 -*- 
  3   
  4  # Copyright (c) 2009-2014, Mario Vilas 
  5  # All rights reserved. 
  6  # 
  7  # Redistribution and use in source and binary forms, with or without 
  8  # modification, are permitted provided that the following conditions are met: 
  9  # 
 10  #     * Redistributions of source code must retain the above copyright notice, 
 11  #       this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above copyright 
 13  #       notice,this list of conditions and the following disclaimer in the 
 14  #       documentation and/or other materials provided with the distribution. 
 15  #     * Neither the name of the copyright holder nor the names of its 
 16  #       contributors may be used to endorse or promote products derived from 
 17  #       this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 20  # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 21  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
 22  # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
 23  # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 24  # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 25  # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 26  # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 27  # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 28  # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
 29  # POSSIBILITY OF SUCH DAMAGE. 
 30   
 31  """ 
 32  Binary code disassembly. 
 33   
 34  @group Disassembler loader: 
 35      Disassembler, Engine 
 36   
 37  @group Disassembler engines: 
 38      BeaEngine, CapstoneEngine, DistormEngine, 
 39      LibdisassembleEngine, PyDasmEngine 
 40  """ 
 41   
 42  from __future__ import with_statement 
 43   
 44  __revision__ = "$Id: disasm.py 1307 2013-12-20 16:51:25Z qvasimodo $" 
 45   
 46  __all__ = [ 
 47      'Disassembler', 
 48      'Engine', 
 49      'BeaEngine', 
 50      'CapstoneEngine', 
 51      'DistormEngine', 
 52      'LibdisassembleEngine', 
 53      'PyDasmEngine', 
 54  ] 
 55   
 56  from textio import HexDump 
 57  import win32 
 58   
 59  import ctypes 
 60  import warnings 
 61   
 62  # lazy imports 
 63  BeaEnginePython = None 
 64  distorm3 = None 
 65  pydasm = None 
 66  libdisassemble = None 
 67  capstone = None 
 68   
 69  #============================================================================== 
 70   
71 -class Engine (object):
72 """ 73 Base class for disassembly engine adaptors. 74 75 @type name: str 76 @cvar name: Engine name to use with the L{Disassembler} class. 77 78 @type desc: str 79 @cvar desc: User friendly name of the disassembler engine. 80 81 @type url: str 82 @cvar url: Download URL. 83 84 @type supported: set(str) 85 @cvar supported: Set of supported processor architectures. 86 For more details see L{win32.version._get_arch}. 87 88 @type arch: str 89 @ivar arch: Name of the processor architecture. 90 """ 91 92 name = "<insert engine name here>" 93 desc = "<insert engine description here>" 94 url = "<insert download url here>" 95 supported = set() 96
97 - def __init__(self, arch = None):
98 """ 99 @type arch: str 100 @param arch: Name of the processor architecture. 101 If not provided the current processor architecture is assumed. 102 For more details see L{win32.version._get_arch}. 103 104 @raise NotImplementedError: This disassembler doesn't support the 105 requested processor architecture. 106 """ 107 self.arch = self._validate_arch(arch) 108 try: 109 self._import_dependencies() 110 except ImportError: 111 msg = "%s is not installed or can't be found. Download it from: %s" 112 msg = msg % (self.name, self.url) 113 raise NotImplementedError(msg)
114
115 - def _validate_arch(self, arch = None):
116 """ 117 @type arch: str 118 @param arch: Name of the processor architecture. 119 If not provided the current processor architecture is assumed. 120 For more details see L{win32.version._get_arch}. 121 122 @rtype: str 123 @return: Name of the processor architecture. 124 If not provided the current processor architecture is assumed. 125 For more details see L{win32.version._get_arch}. 126 127 @raise NotImplementedError: This disassembler doesn't support the 128 requested processor architecture. 129 """ 130 131 # Use the default architecture if none specified. 132 if not arch: 133 arch = win32.arch 134 135 # Validate the architecture. 136 if arch not in self.supported: 137 msg = "The %s engine cannot decode %s code." 138 msg = msg % (self.name, arch) 139 raise NotImplementedError(msg) 140 141 # Return the architecture. 142 return arch
143
144 - def _import_dependencies(self):
145 """ 146 Loads the dependencies for this disassembler. 147 148 @raise ImportError: This disassembler cannot find or load the 149 necessary dependencies to make it work. 150 """ 151 raise SyntaxError("Subclasses MUST implement this method!")
152
153 - def decode(self, address, code):
154 """ 155 @type address: int 156 @param address: Memory address where the code was read from. 157 158 @type code: str 159 @param code: Machine code to disassemble. 160 161 @rtype: list of tuple( long, int, str, str ) 162 @return: List of tuples. Each tuple represents an assembly instruction 163 and contains: 164 - Memory address of instruction. 165 - Size of instruction in bytes. 166 - Disassembly line of instruction. 167 - Hexadecimal dump of instruction. 168 169 @raise NotImplementedError: This disassembler could not be loaded. 170 This may be due to missing dependencies. 171 """ 172 raise NotImplementedError()
173 174 #============================================================================== 175
176 -class BeaEngine (Engine):
177 """ 178 Integration with the BeaEngine disassembler by Beatrix. 179 180 @see: U{https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/} 181 """ 182 183 name = "BeaEngine" 184 desc = "BeaEngine disassembler by Beatrix" 185 url = "https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/" 186 187 supported = set(( 188 win32.ARCH_I386, 189 win32.ARCH_AMD64, 190 )) 191
192 - def _import_dependencies(self):
193 194 # Load the BeaEngine ctypes wrapper. 195 global BeaEnginePython 196 if BeaEnginePython is None: 197 import BeaEnginePython
198
199 - def decode(self, address, code):
200 addressof = ctypes.addressof 201 202 # Instance the code buffer. 203 buffer = ctypes.create_string_buffer(code) 204 buffer_ptr = addressof(buffer) 205 206 # Instance the disassembler structure. 207 Instruction = BeaEnginePython.DISASM() 208 Instruction.VirtualAddr = address 209 Instruction.EIP = buffer_ptr 210 Instruction.SecurityBlock = buffer_ptr + len(code) 211 if self.arch == win32.ARCH_I386: 212 Instruction.Archi = 0 213 else: 214 Instruction.Archi = 0x40 215 Instruction.Options = ( BeaEnginePython.Tabulation + 216 BeaEnginePython.NasmSyntax + 217 BeaEnginePython.SuffixedNumeral + 218 BeaEnginePython.ShowSegmentRegs ) 219 220 # Prepare for looping over each instruction. 221 result = [] 222 Disasm = BeaEnginePython.Disasm 223 InstructionPtr = addressof(Instruction) 224 hexdump = HexDump.hexadecimal 225 append = result.append 226 OUT_OF_BLOCK = BeaEnginePython.OUT_OF_BLOCK 227 UNKNOWN_OPCODE = BeaEnginePython.UNKNOWN_OPCODE 228 229 # For each decoded instruction... 230 while True: 231 232 # Calculate the current offset into the buffer. 233 offset = Instruction.EIP - buffer_ptr 234 235 # If we've gone past the buffer, break the loop. 236 if offset >= len(code): 237 break 238 239 # Decode the current instruction. 240 InstrLength = Disasm(InstructionPtr) 241 242 # If BeaEngine detects we've gone past the buffer, break the loop. 243 if InstrLength == OUT_OF_BLOCK: 244 break 245 246 # The instruction could not be decoded. 247 if InstrLength == UNKNOWN_OPCODE: 248 249 # Output a single byte as a "db" instruction. 250 char = "%.2X" % ord(buffer[offset]) 251 result.append(( 252 Instruction.VirtualAddr, 253 1, 254 "db %sh" % char, 255 char, 256 )) 257 Instruction.VirtualAddr += 1 258 Instruction.EIP += 1 259 260 # The instruction was decoded but reading past the buffer's end. 261 # This can happen when the last instruction is a prefix without an 262 # opcode. For example: decode(0, '\x66') 263 elif offset + InstrLength > len(code): 264 265 # Output each byte as a "db" instruction. 266 for char in buffer[ offset : offset + len(code) ]: 267 char = "%.2X" % ord(char) 268 result.append(( 269 Instruction.VirtualAddr, 270 1, 271 "db %sh" % char, 272 char, 273 )) 274 Instruction.VirtualAddr += 1 275 Instruction.EIP += 1 276 277 # The instruction was decoded correctly. 278 else: 279 280 # Output the decoded instruction. 281 append(( 282 Instruction.VirtualAddr, 283 InstrLength, 284 Instruction.CompleteInstr.strip(), 285 hexdump(buffer.raw[offset:offset+InstrLength]), 286 )) 287 Instruction.VirtualAddr += InstrLength 288 Instruction.EIP += InstrLength 289 290 # Return the list of decoded instructions. 291 return result
292 293 #============================================================================== 294
295 -class DistormEngine (Engine):
296 """ 297 Integration with the diStorm disassembler by Gil Dabah. 298 299 @see: U{https://code.google.com/p/distorm3} 300 """ 301 302 name = "diStorm" 303 desc = "diStorm disassembler by Gil Dabah" 304 url = "https://code.google.com/p/distorm3" 305 306 supported = set(( 307 win32.ARCH_I386, 308 win32.ARCH_AMD64, 309 )) 310
311 - def _import_dependencies(self):
312 313 # Load the distorm bindings. 314 global distorm3 315 if distorm3 is None: 316 try: 317 import distorm3 318 except ImportError: 319 import distorm as distorm3 320 321 # Load the decoder function. 322 self.__decode = distorm3.Decode 323 324 # Load the bits flag. 325 self.__flag = { 326 win32.ARCH_I386: distorm3.Decode32Bits, 327 win32.ARCH_AMD64: distorm3.Decode64Bits, 328 }[self.arch]
329
330 - def decode(self, address, code):
331 return self.__decode(address, code, self.__flag)
332 333 #============================================================================== 334
335 -class PyDasmEngine (Engine):
336 """ 337 Integration with PyDasm: Python bindings to libdasm. 338 339 @see: U{https://code.google.com/p/libdasm/} 340 """ 341 342 name = "PyDasm" 343 desc = "PyDasm: Python bindings to libdasm" 344 url = "https://code.google.com/p/libdasm/" 345 346 supported = set(( 347 win32.ARCH_I386, 348 )) 349
350 - def _import_dependencies(self):
351 352 # Load the libdasm bindings. 353 global pydasm 354 if pydasm is None: 355 import pydasm
356
357 - def decode(self, address, code):
358 359 # Decode each instruction in the buffer. 360 result = [] 361 offset = 0 362 while offset < len(code): 363 364 # Try to decode the current instruction. 365 instruction = pydasm.get_instruction(code[offset:offset+32], 366 pydasm.MODE_32) 367 368 # Get the memory address of the current instruction. 369 current = address + offset 370 371 # Illegal opcode or opcode longer than remaining buffer. 372 if not instruction or instruction.length + offset > len(code): 373 hexdump = '%.2X' % ord(code[offset]) 374 disasm = 'db 0x%s' % hexdump 375 ilen = 1 376 377 # Correctly decoded instruction. 378 else: 379 disasm = pydasm.get_instruction_string(instruction, 380 pydasm.FORMAT_INTEL, 381 current) 382 ilen = instruction.length 383 hexdump = HexDump.hexadecimal(code[offset:offset+ilen]) 384 385 # Add the decoded instruction to the list. 386 result.append(( 387 current, 388 ilen, 389 disasm, 390 hexdump, 391 )) 392 393 # Move to the next instruction. 394 offset += ilen 395 396 # Return the list of decoded instructions. 397 return result
398 399 #============================================================================== 400
401 -class LibdisassembleEngine (Engine):
402 """ 403 Integration with Immunity libdisassemble. 404 405 @see: U{http://www.immunitysec.com/resources-freesoftware.shtml} 406 """ 407 408 name = "Libdisassemble" 409 desc = "Immunity libdisassemble" 410 url = "http://www.immunitysec.com/resources-freesoftware.shtml" 411 412 supported = set(( 413 win32.ARCH_I386, 414 )) 415
416 - def _import_dependencies(self):
417 418 # Load the libdisassemble module. 419 # Since it doesn't come with an installer or an __init__.py file 420 # users can only install it manually however they feel like it, 421 # so we'll have to do a bit of guessing to find it. 422 423 global libdisassemble 424 if libdisassemble is None: 425 try: 426 427 # If installed properly with __init__.py 428 import libdisassemble.disassemble as libdisassemble 429 430 except ImportError: 431 432 # If installed by just copying and pasting the files 433 import disassemble as libdisassemble
434
435 - def decode(self, address, code):
436 437 # Decode each instruction in the buffer. 438 result = [] 439 offset = 0 440 while offset < len(code): 441 442 # Decode the current instruction. 443 opcode = libdisassemble.Opcode( code[offset:offset+32] ) 444 length = opcode.getSize() 445 disasm = opcode.printOpcode('INTEL') 446 hexdump = HexDump.hexadecimal( code[offset:offset+length] ) 447 448 # Add the decoded instruction to the list. 449 result.append(( 450 address + offset, 451 length, 452 disasm, 453 hexdump, 454 )) 455 456 # Move to the next instruction. 457 offset += length 458 459 # Return the list of decoded instructions. 460 return result
461 462 #============================================================================== 463
464 -class CapstoneEngine (Engine):
465 """ 466 Integration with the Capstone disassembler by Nguyen Anh Quynh. 467 468 @see: U{http://www.capstone-engine.org/} 469 """ 470 471 name = "Capstone" 472 desc = "Capstone disassembler by Nguyen Anh Quynh" 473 url = "http://www.capstone-engine.org/" 474 475 supported = set(( 476 win32.ARCH_I386, 477 win32.ARCH_AMD64, 478 win32.ARCH_THUMB, 479 win32.ARCH_ARM, 480 win32.ARCH_ARM64, 481 )) 482
483 - def _import_dependencies(self):
484 485 # Load the Capstone bindings. 486 global capstone 487 if capstone is None: 488 import capstone 489 490 # Load the constants for the requested architecture. 491 self.__constants = { 492 win32.ARCH_I386: 493 (capstone.CS_ARCH_X86, capstone.CS_MODE_32), 494 win32.ARCH_AMD64: 495 (capstone.CS_ARCH_X86, capstone.CS_MODE_64), 496 win32.ARCH_THUMB: 497 (capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB), 498 win32.ARCH_ARM: 499 (capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM), 500 win32.ARCH_ARM64: 501 (capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM), 502 } 503 504 # Test for the bug in early versions of Capstone. 505 # If found, warn the user about it. 506 try: 507 self.__bug = not isinstance( 508 capstone.cs_disasm_quick( 509 capstone.CS_ARCH_X86, capstone.CS_MODE_32, "\x90", 1)[0], 510 capstone.capstone.CsInsn) 511 except AttributeError: 512 self.__bug = False 513 if self.__bug: 514 warnings.warn( 515 "This version of the Capstone bindings is unstable," 516 " please upgrade to a newer one!", 517 RuntimeWarning, stacklevel=4)
518 519
520 - def decode(self, address, code):
521 522 # Get the constants for the requested architecture. 523 arch, mode = self.__constants[self.arch] 524 525 # Get the decoder function outside the loop. 526 decoder = capstone.cs_disasm_quick 527 528 # If the buggy version of the bindings are being used, we need to catch 529 # all exceptions broadly. If not, we only need to catch CsError. 530 if self.__bug: 531 CsError = Exception 532 else: 533 CsError = capstone.CsError 534 535 # Create the variables for the instruction length, mnemonic and 536 # operands. That way they won't be created within the loop, 537 # minimizing the chances data might be overwritten. 538 # This only makes sense for the buggy vesion of the bindings, normally 539 # memory accesses are safe). 540 length = mnemonic = op_str = None 541 542 # For each instruction... 543 result = [] 544 offset = 0 545 while offset < len(code): 546 547 # Disassemble a single instruction, because disassembling multiple 548 # instructions may cause excessive memory usage (Capstone allocates 549 # approximately 1K of metadata per each decoded instruction). 550 instr = None 551 try: 552 instr = decoder( 553 arch, mode, code[offset:offset+16], address+offset, 1)[0] 554 except IndexError: 555 pass # No instructions decoded. 556 except CsError: 557 pass # Any other error. 558 559 # On success add the decoded instruction. 560 if instr is not None: 561 562 # Get the instruction length, mnemonic and operands. 563 # Copy the values quickly before someone overwrites them, 564 # if using the buggy version of the bindings (otherwise it's 565 # irrelevant in which order we access the properties). 566 length = instr.size 567 mnemonic = instr.mnemonic 568 op_str = instr.op_str 569 570 # Concatenate the mnemonic and the operands. 571 if op_str: 572 disasm = "%s %s" % (mnemonic, op_str) 573 else: 574 disasm = mnemonic 575 576 # Get the instruction bytes as a hexadecimal dump. 577 hexdump = HexDump.hexadecimal( code[offset:offset+length] ) 578 579 # On error add a "define constant" instruction. 580 # The exact instruction depends on the architecture. 581 else: 582 583 # The number of bytes to skip depends on the architecture. 584 # On Intel processors we'll skip one byte, since we can't 585 # really know the instruction length. On the rest of the 586 # architectures we always know the instruction length. 587 if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): 588 length = 1 589 else: 590 length = 4 591 592 # Get the skipped bytes as a hexadecimal dump. 593 skipped = code[offset:offset+length] 594 hexdump = HexDump.hexadecimal(skipped) 595 596 # Build the "define constant" instruction. 597 # On Intel processors it's "db". 598 # On ARM processors it's "dcb". 599 if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): 600 mnemonic = "db " 601 else: 602 mnemonic = "dcb " 603 bytes = [] 604 for b in skipped: 605 if b.isalpha(): 606 bytes.append("'%s'" % b) 607 else: 608 bytes.append("0x%x" % ord(b)) 609 op_str = ", ".join(bytes) 610 disasm = mnemonic + op_str 611 612 # Add the decoded instruction to the list. 613 result.append(( 614 address + offset, 615 length, 616 disasm, 617 hexdump, 618 )) 619 620 # Update the offset. 621 offset += length 622 623 # Return the list of decoded instructions. 624 return result
625 626 #============================================================================== 627 628 # TODO: use a lock to access __decoder 629 # TODO: look in sys.modules for whichever disassembler is already loaded 630
631 -class Disassembler (object):
632 """ 633 Generic disassembler. Uses a set of adapters to decide which library to 634 load for which supported platform. 635 636 @type engines: tuple( L{Engine} ) 637 @cvar engines: Set of supported engines. If you implement your own adapter 638 you can add its class here to make it available to L{Disassembler}. 639 Supported disassemblers are: 640 """ 641 642 engines = ( 643 DistormEngine, # diStorm engine goes first for backwards compatibility 644 BeaEngine, 645 CapstoneEngine, 646 LibdisassembleEngine, 647 PyDasmEngine, 648 ) 649 650 # Add the list of supported disassemblers to the docstring. 651 __doc__ += "\n" 652 for e in engines: 653 __doc__ += " - %s - %s (U{%s})\n" % (e.name, e.desc, e.url) 654 del e 655 656 # Cache of already loaded disassemblers. 657 __decoder = {} 658
659 - def __new__(cls, arch = None, engine = None):
660 """ 661 Factory class. You can't really instance a L{Disassembler} object, 662 instead one of the adapter L{Engine} subclasses is returned. 663 664 @type arch: str 665 @param arch: (Optional) Name of the processor architecture. 666 If not provided the current processor architecture is assumed. 667 For more details see L{win32.version._get_arch}. 668 669 @type engine: str 670 @param engine: (Optional) Name of the disassembler engine. 671 If not provided a compatible one is loaded automatically. 672 See: L{Engine.name} 673 674 @raise NotImplementedError: No compatible disassembler was found that 675 could decode machine code for the requested architecture. This may 676 be due to missing dependencies. 677 678 @raise ValueError: An unknown engine name was supplied. 679 """ 680 681 # Use the default architecture if none specified. 682 if not arch: 683 arch = win32.arch 684 685 # Return a compatible engine if none specified. 686 if not engine: 687 found = False 688 for clazz in cls.engines: 689 try: 690 if arch in clazz.supported: 691 selected = (clazz.name, arch) 692 try: 693 decoder = cls.__decoder[selected] 694 except KeyError: 695 decoder = clazz(arch) 696 cls.__decoder[selected] = decoder 697 return decoder 698 except NotImplementedError, e: 699 pass 700 msg = "No disassembler engine available for %s code." % arch 701 raise NotImplementedError(msg) 702 703 # Return the specified engine. 704 selected = (engine, arch) 705 try: 706 decoder = cls.__decoder[selected] 707 except KeyError: 708 found = False 709 engineLower = engine.lower() 710 for clazz in cls.engines: 711 if clazz.name.lower() == engineLower: 712 found = True 713 break 714 if not found: 715 msg = "Unsupported disassembler engine: %s" % engine 716 raise ValueError(msg) 717 if arch not in clazz.supported: 718 msg = "The %s engine cannot decode %s code." % selected 719 raise NotImplementedError(msg) 720 decoder = clazz(arch) 721 cls.__decoder[selected] = decoder 722 return decoder
723