1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """
32 Binary code disassembly.
33
34 @group Disassembler loader:
35 Disassembler, Engine
36
37 @group Disassembler engines:
38 BeaEngine, CapstoneEngine, DistormEngine,
39 LibdisassembleEngine, PyDasmEngine
40 """
41
42 from __future__ import with_statement
43
44 __revision__ = "$Id: disasm.py 1307 2013-12-20 16:51:25Z qvasimodo $"
45
46 __all__ = [
47 'Disassembler',
48 'Engine',
49 'BeaEngine',
50 'CapstoneEngine',
51 'DistormEngine',
52 'LibdisassembleEngine',
53 'PyDasmEngine',
54 ]
55
56 from textio import HexDump
57 import win32
58
59 import ctypes
60 import warnings
61
62
63 BeaEnginePython = None
64 distorm3 = None
65 pydasm = None
66 libdisassemble = None
67 capstone = None
68
69
70
72 """
73 Base class for disassembly engine adaptors.
74
75 @type name: str
76 @cvar name: Engine name to use with the L{Disassembler} class.
77
78 @type desc: str
79 @cvar desc: User friendly name of the disassembler engine.
80
81 @type url: str
82 @cvar url: Download URL.
83
84 @type supported: set(str)
85 @cvar supported: Set of supported processor architectures.
86 For more details see L{win32.version._get_arch}.
87
88 @type arch: str
89 @ivar arch: Name of the processor architecture.
90 """
91
92 name = "<insert engine name here>"
93 desc = "<insert engine description here>"
94 url = "<insert download url here>"
95 supported = set()
96
98 """
99 @type arch: str
100 @param arch: Name of the processor architecture.
101 If not provided the current processor architecture is assumed.
102 For more details see L{win32.version._get_arch}.
103
104 @raise NotImplementedError: This disassembler doesn't support the
105 requested processor architecture.
106 """
107 self.arch = self._validate_arch(arch)
108 try:
109 self._import_dependencies()
110 except ImportError:
111 msg = "%s is not installed or can't be found. Download it from: %s"
112 msg = msg % (self.name, self.url)
113 raise NotImplementedError(msg)
114
116 """
117 @type arch: str
118 @param arch: Name of the processor architecture.
119 If not provided the current processor architecture is assumed.
120 For more details see L{win32.version._get_arch}.
121
122 @rtype: str
123 @return: Name of the processor architecture.
124 If not provided the current processor architecture is assumed.
125 For more details see L{win32.version._get_arch}.
126
127 @raise NotImplementedError: This disassembler doesn't support the
128 requested processor architecture.
129 """
130
131
132 if not arch:
133 arch = win32.arch
134
135
136 if arch not in self.supported:
137 msg = "The %s engine cannot decode %s code."
138 msg = msg % (self.name, arch)
139 raise NotImplementedError(msg)
140
141
142 return arch
143
145 """
146 Loads the dependencies for this disassembler.
147
148 @raise ImportError: This disassembler cannot find or load the
149 necessary dependencies to make it work.
150 """
151 raise SyntaxError("Subclasses MUST implement this method!")
152
153 - def decode(self, address, code):
154 """
155 @type address: int
156 @param address: Memory address where the code was read from.
157
158 @type code: str
159 @param code: Machine code to disassemble.
160
161 @rtype: list of tuple( long, int, str, str )
162 @return: List of tuples. Each tuple represents an assembly instruction
163 and contains:
164 - Memory address of instruction.
165 - Size of instruction in bytes.
166 - Disassembly line of instruction.
167 - Hexadecimal dump of instruction.
168
169 @raise NotImplementedError: This disassembler could not be loaded.
170 This may be due to missing dependencies.
171 """
172 raise NotImplementedError()
173
174
175
177 """
178 Integration with the BeaEngine disassembler by Beatrix.
179
180 @see: U{https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/}
181 """
182
183 name = "BeaEngine"
184 desc = "BeaEngine disassembler by Beatrix"
185 url = "https://sourceforge.net/projects/winappdbg/files/additional%20packages/BeaEngine/"
186
187 supported = set((
188 win32.ARCH_I386,
189 win32.ARCH_AMD64,
190 ))
191
198
199 - def decode(self, address, code):
200 addressof = ctypes.addressof
201
202
203 buffer = ctypes.create_string_buffer(code)
204 buffer_ptr = addressof(buffer)
205
206
207 Instruction = BeaEnginePython.DISASM()
208 Instruction.VirtualAddr = address
209 Instruction.EIP = buffer_ptr
210 Instruction.SecurityBlock = buffer_ptr + len(code)
211 if self.arch == win32.ARCH_I386:
212 Instruction.Archi = 0
213 else:
214 Instruction.Archi = 0x40
215 Instruction.Options = ( BeaEnginePython.Tabulation +
216 BeaEnginePython.NasmSyntax +
217 BeaEnginePython.SuffixedNumeral +
218 BeaEnginePython.ShowSegmentRegs )
219
220
221 result = []
222 Disasm = BeaEnginePython.Disasm
223 InstructionPtr = addressof(Instruction)
224 hexdump = HexDump.hexadecimal
225 append = result.append
226 OUT_OF_BLOCK = BeaEnginePython.OUT_OF_BLOCK
227 UNKNOWN_OPCODE = BeaEnginePython.UNKNOWN_OPCODE
228
229
230 while True:
231
232
233 offset = Instruction.EIP - buffer_ptr
234
235
236 if offset >= len(code):
237 break
238
239
240 InstrLength = Disasm(InstructionPtr)
241
242
243 if InstrLength == OUT_OF_BLOCK:
244 break
245
246
247 if InstrLength == UNKNOWN_OPCODE:
248
249
250 char = "%.2X" % ord(buffer[offset])
251 result.append((
252 Instruction.VirtualAddr,
253 1,
254 "db %sh" % char,
255 char,
256 ))
257 Instruction.VirtualAddr += 1
258 Instruction.EIP += 1
259
260
261
262
263 elif offset + InstrLength > len(code):
264
265
266 for char in buffer[ offset : offset + len(code) ]:
267 char = "%.2X" % ord(char)
268 result.append((
269 Instruction.VirtualAddr,
270 1,
271 "db %sh" % char,
272 char,
273 ))
274 Instruction.VirtualAddr += 1
275 Instruction.EIP += 1
276
277
278 else:
279
280
281 append((
282 Instruction.VirtualAddr,
283 InstrLength,
284 Instruction.CompleteInstr.strip(),
285 hexdump(buffer.raw[offset:offset+InstrLength]),
286 ))
287 Instruction.VirtualAddr += InstrLength
288 Instruction.EIP += InstrLength
289
290
291 return result
292
293
294
296 """
297 Integration with the diStorm disassembler by Gil Dabah.
298
299 @see: U{https://code.google.com/p/distorm3}
300 """
301
302 name = "diStorm"
303 desc = "diStorm disassembler by Gil Dabah"
304 url = "https://code.google.com/p/distorm3"
305
306 supported = set((
307 win32.ARCH_I386,
308 win32.ARCH_AMD64,
309 ))
310
329
330 - def decode(self, address, code):
331 return self.__decode(address, code, self.__flag)
332
333
334
336 """
337 Integration with PyDasm: Python bindings to libdasm.
338
339 @see: U{https://code.google.com/p/libdasm/}
340 """
341
342 name = "PyDasm"
343 desc = "PyDasm: Python bindings to libdasm"
344 url = "https://code.google.com/p/libdasm/"
345
346 supported = set((
347 win32.ARCH_I386,
348 ))
349
356
357 - def decode(self, address, code):
358
359
360 result = []
361 offset = 0
362 while offset < len(code):
363
364
365 instruction = pydasm.get_instruction(code[offset:offset+32],
366 pydasm.MODE_32)
367
368
369 current = address + offset
370
371
372 if not instruction or instruction.length + offset > len(code):
373 hexdump = '%.2X' % ord(code[offset])
374 disasm = 'db 0x%s' % hexdump
375 ilen = 1
376
377
378 else:
379 disasm = pydasm.get_instruction_string(instruction,
380 pydasm.FORMAT_INTEL,
381 current)
382 ilen = instruction.length
383 hexdump = HexDump.hexadecimal(code[offset:offset+ilen])
384
385
386 result.append((
387 current,
388 ilen,
389 disasm,
390 hexdump,
391 ))
392
393
394 offset += ilen
395
396
397 return result
398
399
400
402 """
403 Integration with Immunity libdisassemble.
404
405 @see: U{http://www.immunitysec.com/resources-freesoftware.shtml}
406 """
407
408 name = "Libdisassemble"
409 desc = "Immunity libdisassemble"
410 url = "http://www.immunitysec.com/resources-freesoftware.shtml"
411
412 supported = set((
413 win32.ARCH_I386,
414 ))
415
434
435 - def decode(self, address, code):
436
437
438 result = []
439 offset = 0
440 while offset < len(code):
441
442
443 opcode = libdisassemble.Opcode( code[offset:offset+32] )
444 length = opcode.getSize()
445 disasm = opcode.printOpcode('INTEL')
446 hexdump = HexDump.hexadecimal( code[offset:offset+length] )
447
448
449 result.append((
450 address + offset,
451 length,
452 disasm,
453 hexdump,
454 ))
455
456
457 offset += length
458
459
460 return result
461
462
463
465 """
466 Integration with the Capstone disassembler by Nguyen Anh Quynh.
467
468 @see: U{http://www.capstone-engine.org/}
469 """
470
471 name = "Capstone"
472 desc = "Capstone disassembler by Nguyen Anh Quynh"
473 url = "http://www.capstone-engine.org/"
474
475 supported = set((
476 win32.ARCH_I386,
477 win32.ARCH_AMD64,
478 win32.ARCH_THUMB,
479 win32.ARCH_ARM,
480 win32.ARCH_ARM64,
481 ))
482
484
485
486 global capstone
487 if capstone is None:
488 import capstone
489
490
491 self.__constants = {
492 win32.ARCH_I386:
493 (capstone.CS_ARCH_X86, capstone.CS_MODE_32),
494 win32.ARCH_AMD64:
495 (capstone.CS_ARCH_X86, capstone.CS_MODE_64),
496 win32.ARCH_THUMB:
497 (capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB),
498 win32.ARCH_ARM:
499 (capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM),
500 win32.ARCH_ARM64:
501 (capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM),
502 }
503
504
505
506 try:
507 self.__bug = not isinstance(
508 capstone.cs_disasm_quick(
509 capstone.CS_ARCH_X86, capstone.CS_MODE_32, "\x90", 1)[0],
510 capstone.capstone.CsInsn)
511 except AttributeError:
512 self.__bug = False
513 if self.__bug:
514 warnings.warn(
515 "This version of the Capstone bindings is unstable,"
516 " please upgrade to a newer one!",
517 RuntimeWarning, stacklevel=4)
518
519
520 - def decode(self, address, code):
521
522
523 arch, mode = self.__constants[self.arch]
524
525
526 decoder = capstone.cs_disasm_quick
527
528
529
530 if self.__bug:
531 CsError = Exception
532 else:
533 CsError = capstone.CsError
534
535
536
537
538
539
540 length = mnemonic = op_str = None
541
542
543 result = []
544 offset = 0
545 while offset < len(code):
546
547
548
549
550 instr = None
551 try:
552 instr = decoder(
553 arch, mode, code[offset:offset+16], address+offset, 1)[0]
554 except IndexError:
555 pass
556 except CsError:
557 pass
558
559
560 if instr is not None:
561
562
563
564
565
566 length = instr.size
567 mnemonic = instr.mnemonic
568 op_str = instr.op_str
569
570
571 if op_str:
572 disasm = "%s %s" % (mnemonic, op_str)
573 else:
574 disasm = mnemonic
575
576
577 hexdump = HexDump.hexadecimal( code[offset:offset+length] )
578
579
580
581 else:
582
583
584
585
586
587 if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64):
588 length = 1
589 else:
590 length = 4
591
592
593 skipped = code[offset:offset+length]
594 hexdump = HexDump.hexadecimal(skipped)
595
596
597
598
599 if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64):
600 mnemonic = "db "
601 else:
602 mnemonic = "dcb "
603 bytes = []
604 for b in skipped:
605 if b.isalpha():
606 bytes.append("'%s'" % b)
607 else:
608 bytes.append("0x%x" % ord(b))
609 op_str = ", ".join(bytes)
610 disasm = mnemonic + op_str
611
612
613 result.append((
614 address + offset,
615 length,
616 disasm,
617 hexdump,
618 ))
619
620
621 offset += length
622
623
624 return result
625
626
627
628
629
630
632 """
633 Generic disassembler. Uses a set of adapters to decide which library to
634 load for which supported platform.
635
636 @type engines: tuple( L{Engine} )
637 @cvar engines: Set of supported engines. If you implement your own adapter
638 you can add its class here to make it available to L{Disassembler}.
639 Supported disassemblers are:
640 """
641
642 engines = (
643 DistormEngine,
644 BeaEngine,
645 CapstoneEngine,
646 LibdisassembleEngine,
647 PyDasmEngine,
648 )
649
650
651 __doc__ += "\n"
652 for e in engines:
653 __doc__ += " - %s - %s (U{%s})\n" % (e.name, e.desc, e.url)
654 del e
655
656
657 __decoder = {}
658
659 - def __new__(cls, arch = None, engine = None):
660 """
661 Factory class. You can't really instance a L{Disassembler} object,
662 instead one of the adapter L{Engine} subclasses is returned.
663
664 @type arch: str
665 @param arch: (Optional) Name of the processor architecture.
666 If not provided the current processor architecture is assumed.
667 For more details see L{win32.version._get_arch}.
668
669 @type engine: str
670 @param engine: (Optional) Name of the disassembler engine.
671 If not provided a compatible one is loaded automatically.
672 See: L{Engine.name}
673
674 @raise NotImplementedError: No compatible disassembler was found that
675 could decode machine code for the requested architecture. This may
676 be due to missing dependencies.
677
678 @raise ValueError: An unknown engine name was supplied.
679 """
680
681
682 if not arch:
683 arch = win32.arch
684
685
686 if not engine:
687 found = False
688 for clazz in cls.engines:
689 try:
690 if arch in clazz.supported:
691 selected = (clazz.name, arch)
692 try:
693 decoder = cls.__decoder[selected]
694 except KeyError:
695 decoder = clazz(arch)
696 cls.__decoder[selected] = decoder
697 return decoder
698 except NotImplementedError, e:
699 pass
700 msg = "No disassembler engine available for %s code." % arch
701 raise NotImplementedError(msg)
702
703
704 selected = (engine, arch)
705 try:
706 decoder = cls.__decoder[selected]
707 except KeyError:
708 found = False
709 engineLower = engine.lower()
710 for clazz in cls.engines:
711 if clazz.name.lower() == engineLower:
712 found = True
713 break
714 if not found:
715 msg = "Unsupported disassembler engine: %s" % engine
716 raise ValueError(msg)
717 if arch not in clazz.supported:
718 msg = "The %s engine cannot decode %s code." % selected
719 raise NotImplementedError(msg)
720 decoder = clazz(arch)
721 cls.__decoder[selected] = decoder
722 return decoder
723