1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 """
33 Process memory search.
34
35 @group Memory search:
36 Search,
37 Pattern,
38 BytePattern,
39 TextPattern,
40 RegExpPattern,
41 HexPattern
42 """
43
44 __revision__ = "$Id: search.py 1299 2013-12-20 09:30:55Z qvasimodo $"
45
46 __all__ = [
47 'Search',
48 'Pattern',
49 'BytePattern',
50 'TextPattern',
51 'RegExpPattern',
52 'HexPattern',
53 ]
54
55 from textio import HexInput
56 from util import StaticClass, MemoryAddresses
57 import win32
58
59 import warnings
60
61 try:
62
63 import regex as re
64 except ImportError:
65 import re
70 """
71 Base class for search patterns.
72
73 The following L{Pattern} subclasses are provided by WinAppDbg:
74 - L{BytePattern}
75 - L{TextPattern}
76 - L{RegExpPattern}
77 - L{HexPattern}
78
79 @see: L{Search.search_process}
80 """
81
83 """
84 Class constructor.
85
86 The only mandatory argument should be the pattern string.
87
88 This method B{MUST} be reimplemented by subclasses of L{Pattern}.
89 """
90 raise NotImplementedError()
91
93 """
94 Returns the maximum expected length of the strings matched by this
95 pattern. Exact behavior is implementation dependent.
96
97 Ideally it should be an exact value, but in some cases it's not
98 possible to calculate so an upper limit should be returned instead.
99
100 If that's not possible either an exception must be raised.
101
102 This value will be used to calculate the required buffer size when
103 doing buffered searches.
104
105 This method B{MUST} be reimplemented by subclasses of L{Pattern}.
106 """
107 raise NotImplementedError()
108
109 - def read(self, process, address, size):
110 """
111 Reads the requested number of bytes from the process memory at the
112 given address.
113
114 Subclasses of L{Pattern} tipically don't need to reimplement this
115 method.
116 """
117 return process.read(address, size)
118
119 - def find(self, buffer, pos = None):
120 """
121 Searches for the pattern in the given buffer, optionally starting at
122 the given position within the buffer.
123
124 This method B{MUST} be reimplemented by subclasses of L{Pattern}.
125
126 @type buffer: str
127 @param buffer: Buffer to search on.
128
129 @type pos: int
130 @param pos:
131 (Optional) Position within the buffer to start searching from.
132
133 @rtype: tuple( int, int )
134 @return: Tuple containing the following:
135 - Position within the buffer where a match is found, or C{-1} if
136 no match was found.
137 - Length of the matched data if a match is found, or undefined if
138 no match was found.
139 """
140 raise NotImplementedError()
141
142 - def found(self, address, size, data):
143 """
144 This method gets called when a match is found.
145
146 This allows subclasses of L{Pattern} to filter out unwanted results,
147 or modify the results before giving them to the caller of
148 L{Search.search_process}.
149
150 If the return value is C{None} the result is skipped.
151
152 Subclasses of L{Pattern} don't need to reimplement this method unless
153 filtering is needed.
154
155 @type address: int
156 @param address: The memory address where the pattern was found.
157
158 @type size: int
159 @param size: The size of the data that matches the pattern.
160
161 @type data: str
162 @param data: The data that matches the pattern.
163
164 @rtype: tuple( int, int, str )
165 @return: Tuple containing the following:
166 * The memory address where the pattern was found.
167 * The size of the data that matches the pattern.
168 * The data that matches the pattern.
169 """
170 return (address, size, data)
171
175 """
176 Fixed byte pattern.
177
178 @type pattern: str
179 @ivar pattern: Byte string to search for.
180
181 @type length: int
182 @ivar length: Length of the byte pattern.
183 """
184
192
194 """
195 Returns the exact length of the pattern.
196
197 @see: L{Pattern.__len__}
198 """
199 return self.length
200
201 - def find(self, buffer, pos = None):
203
204
205
206
207
208 -class TextPattern (BytePattern):
209 """
210 Text pattern.
211
212 @type isUnicode: bool
213 @ivar isUnicode: C{True} if the text to search for is a unicode string,
214 C{False} otherwise.
215
216 @type encoding: str
217 @ivar encoding: Encoding for the text parameter.
218 Only used when the text to search for is a Unicode string.
219 Don't change unless you know what you're doing!
220
221 @type caseSensitive: bool
222 @ivar caseSensitive: C{True} of the search is case sensitive,
223 C{False} otherwise.
224 """
225
226 - def __init__(self, text, encoding = "utf-16le", caseSensitive = False):
227 """
228 @type text: str or unicode
229 @param text: Text to search for.
230
231 @type encoding: str
232 @param encoding: (Optional) Encoding for the text parameter.
233 Only used when the text to search for is a Unicode string.
234 Don't change unless you know what you're doing!
235
236 @type caseSensitive: bool
237 @param caseSensitive: C{True} of the search is case sensitive,
238 C{False} otherwise.
239 """
240 self.isUnicode = isinstance(text, unicode)
241 self.encoding = encoding
242 self.caseSensitive = caseSensitive
243 if not self.caseSensitive:
244 pattern = text.lower()
245 if self.isUnicode:
246 pattern = text.encode(encoding)
247 super(TextPattern, self).__init__(pattern)
248
249 - def read(self, process, address, size):
250 data = super(TextPattern, self).read(address, size)
251 if not self.caseSensitive:
252 if self.isUnicode:
253 try:
254 encoding = self.encoding
255 text = data.decode(encoding, "replace")
256 text = text.lower()
257 new_data = text.encode(encoding, "replace")
258 if len(data) == len(new_data):
259 data = new_data
260 else:
261 data = data.lower()
262 except Exception:
263 data = data.lower()
264 else:
265 data = data.lower()
266 return data
267
268 - def found(self, address, size, data):
269 if self.isUnicode:
270 try:
271 data = unicode(data, self.encoding)
272 except Exception, e:
273
274 return None
275 return (address, size, data)
276
280 """
281 Regular expression pattern.
282
283 @type pattern: str
284 @ivar pattern: Regular expression in text form.
285
286 @type flags: int
287 @ivar flags: Regular expression flags.
288
289 @type regexp: re.compile
290 @ivar regexp: Regular expression in compiled form.
291
292 @type maxLength: int
293 @ivar maxLength:
294 Maximum expected length of the strings matched by this regular
295 expression.
296
297 This value will be used to calculate the required buffer size when
298 doing buffered searches.
299
300 Ideally it should be an exact value, but in some cases it's not
301 possible to calculate so an upper limit should be given instead.
302
303 If that's not possible either, C{None} should be used. That will
304 cause an exception to be raised if this pattern is used in a
305 buffered search.
306 """
307
308 - def __init__(self, regexp, flags = 0, maxLength = None):
309 """
310 @type regexp: str
311 @param regexp: Regular expression string.
312
313 @type flags: int
314 @param flags: Regular expression flags.
315
316 @type maxLength: int
317 @param maxLength: Maximum expected length of the strings matched by
318 this regular expression.
319
320 This value will be used to calculate the required buffer size when
321 doing buffered searches.
322
323 Ideally it should be an exact value, but in some cases it's not
324 possible to calculate so an upper limit should be given instead.
325
326 If that's not possible either, C{None} should be used. That will
327 cause an exception to be raised if this pattern is used in a
328 buffered search.
329 """
330 self.pattern = regexp
331 self.flags = flags
332 self.regexp = re.compile(regexp, flags)
333 self.maxLength = maxLength
334
336 """
337 Returns the maximum expected length of the strings matched by this
338 pattern. This value is taken from the C{maxLength} argument of the
339 constructor if this class.
340
341 Ideally it should be an exact value, but in some cases it's not
342 possible to calculate so an upper limit should be returned instead.
343
344 If that's not possible either an exception must be raised.
345
346 This value will be used to calculate the required buffer size when
347 doing buffered searches.
348 """
349 if self.maxLength is None:
350 raise NotImplementedError()
351 return self.maxLength
352
353 - def find(self, buffer, pos = None):
361
365 """
366 Hexadecimal pattern.
367
368 Hex patterns must be in this form::
369 "68 65 6c 6c 6f 20 77 6f 72 6c 64" # "hello world"
370
371 Spaces are optional. Capitalization of hex digits doesn't matter.
372 This is exactly equivalent to the previous example::
373 "68656C6C6F20776F726C64" # "hello world"
374
375 Wildcards are allowed, in the form of a C{?} sign in any hex digit::
376 "5? 5? c3" # pop register / pop register / ret
377 "b8 ?? ?? ?? ??" # mov eax, immediate value
378
379 @type pattern: str
380 @ivar pattern: Hexadecimal pattern.
381 """
382
384 """
385 If the pattern is completely static (no wildcards are present) a
386 L{BytePattern} is created instead. That's because searching for a
387 fixed byte pattern is faster than searching for a regular expression.
388 """
389 if '?' not in pattern:
390 return BytePattern( HexInput.hexadecimal(pattern) )
391 return object.__new__(cls, pattern)
392
394 """
395 Hex patterns must be in this form::
396 "68 65 6c 6c 6f 20 77 6f 72 6c 64" # "hello world"
397
398 Spaces are optional. Capitalization of hex digits doesn't matter.
399 This is exactly equivalent to the previous example::
400 "68656C6C6F20776F726C64" # "hello world"
401
402 Wildcards are allowed, in the form of a C{?} sign in any hex digit::
403 "5? 5? c3" # pop register / pop register / ret
404 "b8 ?? ?? ?? ??" # mov eax, immediate value
405
406 @type hexa: str
407 @param hexa: Pattern to search for.
408 """
409 maxLength = len([x for x in hexa
410 if x in "?0123456789ABCDEFabcdef"]) / 2
411 super(HexPattern, self).__init__(HexInput.pattern(hexa),
412 maxLength = maxLength)
413
414
415
416 -class Search (StaticClass):
417 """
418 Static class to group the search functionality.
419
420 Do not instance this class! Use its static methods instead.
421 """
422
423
424
425
426
427
428 @staticmethod
429 - def search_process(process, pattern, minAddr = None,
430 maxAddr = None,
431 bufferPages = None,
432 overlapping = False):
433 """
434 Search for the given pattern within the process memory.
435
436 @type process: L{Process}
437 @param process: Process to search.
438
439 @type pattern: L{Pattern}
440 @param pattern: Pattern to search for.
441 It must be an instance of a subclass of L{Pattern}.
442
443 The following L{Pattern} subclasses are provided by WinAppDbg:
444 - L{BytePattern}
445 - L{TextPattern}
446 - L{RegExpPattern}
447 - L{HexPattern}
448
449 You can also write your own subclass of L{Pattern} for customized
450 searches.
451
452 @type minAddr: int
453 @param minAddr: (Optional) Start the search at this memory address.
454
455 @type maxAddr: int
456 @param maxAddr: (Optional) Stop the search at this memory address.
457
458 @type bufferPages: int
459 @param bufferPages: (Optional) Number of memory pages to buffer when
460 performing the search. Valid values are:
461 - C{0} or C{None}:
462 Automatically determine the required buffer size. May not give
463 complete results for regular expressions that match variable
464 sized strings.
465 - C{> 0}: Set the buffer size, in memory pages.
466 - C{< 0}: Disable buffering entirely. This may give you a little
467 speed gain at the cost of an increased memory usage. If the
468 target process has very large contiguous memory regions it may
469 actually be slower or even fail. It's also the only way to
470 guarantee complete results for regular expressions that match
471 variable sized strings.
472
473 @type overlapping: bool
474 @param overlapping: C{True} to allow overlapping results, C{False}
475 otherwise.
476
477 Overlapping results yield the maximum possible number of results.
478
479 For example, if searching for "AAAA" within "AAAAAAAA" at address
480 C{0x10000}, when overlapping is turned off the following matches
481 are yielded::
482 (0x10000, 4, "AAAA")
483 (0x10004, 4, "AAAA")
484
485 If overlapping is turned on, the following matches are yielded::
486 (0x10000, 4, "AAAA")
487 (0x10001, 4, "AAAA")
488 (0x10002, 4, "AAAA")
489 (0x10003, 4, "AAAA")
490 (0x10004, 4, "AAAA")
491
492 As you can see, the middle results are overlapping the last two.
493
494 @rtype: iterator of tuple( int, int, str )
495 @return: An iterator of tuples. Each tuple contains the following:
496 - The memory address where the pattern was found.
497 - The size of the data that matches the pattern.
498 - The data that matches the pattern.
499
500 @raise WindowsError: An error occurred when querying or reading the
501 process memory.
502 """
503
504
505 MEM_COMMIT = win32.MEM_COMMIT
506 PAGE_GUARD = win32.PAGE_GUARD
507 page = MemoryAddresses.pageSize
508 read = pattern.read
509 find = pattern.find
510
511
512 if minAddr is None:
513 minAddr = 0
514 if maxAddr is None:
515 maxAddr = win32.LPVOID(-1).value
516
517
518 if bufferPages is None:
519 try:
520 size = MemoryAddresses.\
521 align_address_to_page_end(len(pattern)) + page
522 except NotImplementedError:
523 size = None
524 elif bufferPages > 0:
525 size = page * (bufferPages + 1)
526 else:
527 size = None
528
529
530 memory_map = process.iter_memory_map(minAddr, maxAddr)
531
532
533 if size:
534
535
536 buffer = ""
537 prev_addr = 0
538 last = 0
539 delta = 0
540 for mbi in memory_map:
541
542
543 if not mbi.has_content():
544 continue
545
546
547 address = mbi.BaseAddress
548 block_size = mbi.RegionSize
549 if address >= maxAddr:
550 break
551 end = address + block_size
552
553
554
555 if delta and address == prev_addr:
556 buffer += read(process, address, page)
557
558
559 else:
560 buffer = read(process, address, min(size, block_size))
561 last = 0
562 delta = 0
563
564
565 while 1:
566
567
568 pos, length = find(buffer, last)
569 while pos >= last:
570 match_addr = address + pos - delta
571 if minAddr <= match_addr < maxAddr:
572 result = pattern.found(
573 match_addr, length,
574 buffer [ pos : pos + length ] )
575 if result is not None:
576 yield result
577 if overlapping:
578 last = pos + 1
579 else:
580 last = pos + length
581 pos, length = find(buffer, last)
582
583
584 address = address + page
585 block_size = block_size - page
586 prev_addr = address
587
588
589 last = last - page
590 if last < 0:
591 last = 0
592
593
594 buffer = buffer[ page : ]
595 delta = page
596
597
598
599 if address < end:
600 buffer = buffer + read(process, address, page)
601
602
603 else:
604 break
605
606
607 else:
608
609
610 for mbi in memory_map:
611
612
613 if not mbi.has_content():
614 continue
615
616
617 address = mbi.BaseAddress
618 block_size = mbi.RegionSize
619 if address >= maxAddr:
620 break;
621
622
623 buffer = process.read(address, block_size)
624
625
626 pos, length = find(buffer)
627 last = 0
628 while pos >= last:
629 match_addr = address + pos
630 if minAddr <= match_addr < maxAddr:
631 result = pattern.found(
632 match_addr, length,
633 buffer [ pos : pos + length ] )
634 if result is not None:
635 yield result
636 if overlapping:
637 last = pos + 1
638 else:
639 last = pos + length
640 pos, length = find(buffer, last)
641
642 @classmethod
644 """
645 Extract ASCII strings from the process memory.
646
647 @type process: L{Process}
648 @param process: Process to search.
649
650 @type minSize: int
651 @param minSize: (Optional) Minimum size of the strings to search for.
652
653 @type maxSize: int
654 @param maxSize: (Optional) Maximum size of the strings to search for.
655
656 @rtype: iterator of tuple(int, int, str)
657 @return: Iterator of strings extracted from the process memory.
658 Each tuple contains the following:
659 - The memory address where the string was found.
660 - The size of the string.
661 - The string.
662 """
663 regexp = r"[\s\w\!\@\#\$\%%\^\&\*\(\)\{\}\[\]\~\`\'\"\:\;\.\,\\\/\-\+\=\_\<\>]{%d,%d}\0" % (minSize, maxSize)
664 pattern = RegExpPattern(regexp, 0, maxSize)
665 return cls.search_process(process, pattern, overlapping = False)
666