1
2
3
4 """Checksumming convenience classes
5
6 TODO! Sorry!
7
8 [@]SourceFileOrURL Alg Digest Length ModTime TargetFileOrURL
9 TOKEN NUMBER: 1 2 3 4 5 6
10
11 """
12
13 from __future__ import with_statement
14
15 COLUMNS = { 0:"SourceFileOrURL",
16 1:"Alg",
17 2:"Digest",
18 3:"Length",
19 4:"ModTime",
20 5:"TargetFileOrURL",
21 }
22
23
24 import os, sys
25 from stat import *
26
27 import re
28
29 from collections import defaultdict
30
31 import hashlib
32
33 import codecs
34
35 import logging
36
37 logging.basicConfig(level=logging.INFO)
38
39 logger = logging.getLogger('checkm')
40
42 """The item or directory was either not found, or not accessible."""
44 """
45 FIXME
46 @param *arg:
47 @type *arg:
48 @param **kw:
49 @type **kw:
50 """
51 self.context = (arg, kw)
53 """
54 FIXME
55 """
56 return self.context.__str__()
58 """
59 FIXME
60 """
61 return self.context.__str__()
62
64 COLUMN_NAMES = [u'# [@]SourceFileOrURL',u'Alg',u'Digest',u'Length',u'ModTime']
70
72 """
73 FIXME
74 @param report:
75 @type report:
76 """
77 cols = defaultdict(lambda : 0)
78 for line in report:
79 for index in xrange(len(line)):
80 if len(line[index])>cols[index]:
81 cols[index] = len(line[index])
82 return cols
83
85 """
86 FIXME
87 @param line:
88 @type line:
89 @param col_maxes:
90 @type col_maxes:
91 """
92 spaced_line = []
93 for index in xrange(len(line)):
94 spaced_line.append(line[index])
95 spaces = col_maxes[index]-len(line[index])+4
96 spaced_line.append(u" "*spaces)
97 return u"".join(spaced_line)
98
99 - def create_bagit_manifest(self, scan_directory, algorithm, recursive=False, delimiter = " ", filename=None):
100 """
101 FIXME
102 @param scan_directory:
103 @type scan_directory:
104 @param algorithm:
105 @type algorithm:
106 @param recursive=False:
107 @type recursive=False:
108 @param delimiter:
109 @type delimiter:
110 @param filename=None:
111 @type filename=None:
112 """
113 if not filename:
114 filename = "manifest-%s.txt" % algorithm
115 logger.info("Creating bagit manifest file(%s) for dir(%s) with Alg:%s" % (filename,
116 scan_directory,
117 algorithm))
118 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=3)
119 if hasattr(filename, 'write'):
120 faked_filename = "manifest-%s.txt" % algorithm
121 for line in report:
122 if line[2] != "d":
123 if os.path.abspath(line[0]) != os.path.abspath(faked_filename):
124 filename.write("%s%s%s\n" % (line[2], delimiter, line[0]))
125 else:
126 logger.info("Manifest file match - scan line ignored")
127 else:
128 with codecs.open(filename, encoding='utf-8', mode="w") as output:
129 for line in report:
130 if line[2] != "d":
131 if os.path.abspath(line[0]) != os.path.abspath(filename):
132 output.write("%s%s%s\n" % (line[2], delimiter, line[0]))
133 else:
134 logger.info("Manifest file match - scan line ignored")
135 output.write("\n")
136 return filename
137
139 logger.info("Creating multilevel checkm files '(%s)' from top level directory(%s) with Alg:%s and columns:%s" % (checkm_filename, top_directory, algorithm, columns))
140 if not os.path.isdir(top_directory):
141 raise NotFound(top_directory=top_directory)
142
143
144
145 dirs = dict([(root, dirnames) for (root, dirnames, _) in os.walk(top_directory, topdown=False)])
146
147 for dirname in dirs:
148 with codecs.open(os.path.join(dirname, checkm_filename), encoding='utf-8', mode="w") as output:
149 self.create_checkm_file(dirname,
150 algorithm,
151 os.path.join(dirname, checkm_filename),
152 recursive=False,
153 columns=columns,
154 checkm_file=output)
155 subdir_report = []
156 for subdir in dirs[dirname]:
157 try:
158 line = self.scanner.scan_path(os.path.join(subdir, checkm_filename), algorithm, columns)
159 line[0] = '@%s' % (line[0])
160 subdir_report.append(line)
161 except Exception, e:
162 print "Fail! %s" % e
163 col_maxes = self._get_max_len(subdir_report)
164 for line in subdir_report:
165 output.write('%s\n' % (self._space_line(line, col_maxes)))
166 output.write('\n')
167
168 - def create_checkm_file(self, scan_directory, algorithm, checkm_filename, recursive=False, columns=3, checkm_file=None):
169 logger.info("Creating checkm file for dir(%s) with Alg:%s and columns: %s" % (
170 scan_directory,
171 algorithm, columns))
172 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=columns)
173 col_maxes = self._get_max_len(report)
174 if checkm_file != None and hasattr(checkm_file, 'write'):
175 checkm_file.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
176 for line in report:
177 if os.path.abspath(line[0]) != os.path.abspath(checkm_filename):
178 checkm_file.write("%s\n" % (self._space_line(line, col_maxes)))
179 else:
180 logger.info("Manifest file match - scan line ignored")
181 return checkm_file
182 else:
183 with codecs.open(checkm_filename, encoding='utf-8', mode="w") as output:
184 output.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
185 for line in report:
186 if os.path.abspath(line[0]) != os.path.abspath(checkm_filename):
187 output.write("%s\n" % (self._space_line(line, col_maxes)))
188 else:
189 logger.info("Manifest file match - scan line ignored")
190 output.write("\n")
191
193 """
194 FIXME
195 @param bagit_filename:
196 @type bagit_filename:
197 @param algorithm=None:
198 @type algorithm=None:
199 """
200 logger.info("Checking files against '%s' bagit manifest" % bagit_filename)
201 if algorithm == None:
202 if hasattr(bagit_filename, 'read'):
203 raise Exception("Need to supply the algorithm when passing a filelike object instead of a filename")
204 m = re.search("manifest-(?P<alg>[^\.]+)\.txt", bagit_filename)
205 if m != None:
206 algorithm = m.groupdict()['alg']
207 parser = BagitParser(bagit_filename)
208 scanner = CheckmScanner()
209 results = {'pass':[], 'fail':{}}
210 for row in parser:
211 if row:
212 try:
213 scan_row = scanner.scan_path(row[1], algorithm, 3)
214 if row[0] != scan_row[2]:
215 logger.info("Failed original: %s" % row)
216 logger.info("Current scan: %s" % scan_row)
217 results['fail'][row[1]] = (row, scan_row)
218 else:
219 results['pass'].append(row[1])
220 except NotFound:
221 scan_row = "File not found"
222 logger.info("Failed original: %s" % row)
223 logger.info("But file not found at this path.")
224 results['fail'][row[1]] = (row, scan_row)
225 return results
226
228 """
229 FIXME
230 @param scan_directory:
231 @type scan_directory:
232 @param checkm_filename:
233 @type checkm_filename:
234 """
235 def _check_files_against_parser(parser):
236 scanner = CheckmScanner()
237 results = {'pass':[], 'fail':{}, 'include':[]}
238 for row in parser:
239 if row:
240 try:
241 if row[0].startswith('@'):
242 row[0] = row[0][1:]
243 results['include'].append(row[0])
244 scan_row = scanner.scan_path(row[0], row[1], len(row))
245 nomatch = False
246 for expected, scanned in zip(row, scan_row):
247 if expected != "-" and expected != scanned:
248 nomatch = True
249 if nomatch:
250 logger.info("Failed original: %s" % row)
251 logger.info("Current scan: %s" % scan_row)
252 results['fail'][row[0]] = (row, scan_row)
253 else:
254 results['pass'].append(row[0])
255 except NotFound:
256 scan_row = "File not found"
257 logger.info("Failed original: %s" % row)
258 logger.info("But file not found at this path.")
259 results['fail'][row[0]] = (row, scan_row)
260 return results
261
262 logger.info("Checking files against %s checkm manifest" % checkm_filename)
263 parser = CheckmParser(checkm_filename)
264 results = _check_files_against_parser(parser)
265 if ignore_multilevel:
266 return results
267 else:
268
269 checkm_list = results['include'][:]
270 while checkm_list:
271 checkm_file = checkm_list.pop()
272 parser = CheckmParser(checkm_file)
273 additional_results = _check_files_against_parser(parser)
274
275 results['pass'].extend(additional_results['pass'])
276
277 results['include'].extend(additional_results['include'])
278 checkm_list.extend(additional_results['include'])
279
280 results['fail'].update(additional_results['fail'])
281 return results
282
285 """
286 FIXME
287 @param bagit_file=None:
288 @type bagit_file=None:
289 """
290 self.status = False
291 self.lines = []
292 if bagit_file:
293 self.parse(bagit_file)
294
296 """
297 FIXME
298 """
299 class Bagit_iter:
300 def __init__(self, lines):
301 """
302 FIXME
303 @param lines:
304 @type lines:
305 """
306 self.lines = lines
307 self.last = 0
308 def __iter__(self):
309 """
310 FIXME
311 """
312 return self
313 def next(self):
314 """
315 FIXME
316 """
317 if self.last >= len(self.lines):
318 raise StopIteration
319 elif len(self.lines) == 0:
320 raise StopIteration
321 else:
322 self.last += 1
323 return self.lines[self.last-1]
324 return Bagit_iter(self.lines)
325
326 - def parse(self, fileobj):
327 """
328 FIXME
329 @param fileobj:
330 @type fileobj:
331 """
332 if not hasattr(fileobj, "read"):
333 with codecs.open(fileobj, encoding='utf-8', mode="r") as check_fh:
334 self._parse_lines(check_fh)
335 else:
336 self._parse_lines(fileobj)
337 return self.lines
338
340 """
341 FIXME
342 @param fh:
343 @type fh:
344 """
345 self.lines = []
346 line_buffer = ""
347 def _parse_line(line):
348 """
349 FIXME
350 @param line:
351 @type line:
352 """
353 if not line.startswith('#'):
354 tokens = filter(lambda x: x, re.split("\s+", line, 1))
355 logger.info(tokens)
356 if tokens:
357
358 if tokens[1].startswith("*"):
359 tokens[1] = tokens[1][1:].strip()
360 self.lines.append(tokens)
361 for chunk in fh.read(0x1000):
362 line_buffer = line_buffer + chunk
363 while True:
364 if not line_buffer:
365 break
366 fragments = line_buffer.split('\n',1)
367 if len(fragments) == 1:
368 break
369 _parse_line(fragments[0])
370 line_buffer = fragments[1]
371
374 """
375 FIXME
376 @param checkm_file=None:
377 @type checkm_file=None:
378 """
379 self.status = False
380 self.lines = []
381 if checkm_file:
382 self.parse(checkm_file)
383
385 """
386 FIXME
387 """
388 class Checkm_iter:
389 def __init__(self, lines):
390 """
391 FIXME
392 @param lines:
393 @type lines:
394 """
395 self.lines = lines
396 self.last = 0
397 def __iter__(self):
398 """
399 FIXME
400 """
401 return self
402 def next(self):
403 """
404 FIXME
405 """
406 if self.last >= len(self.lines):
407 raise StopIteration
408 elif len(self.lines) == 0:
409 raise StopIteration
410 else:
411 self.last += 1
412 return self.lines[self.last-1]
413 return Checkm_iter(self.lines)
414
415 - def parse(self, checkm_file):
416 """
417 FIXME
418 @param checkm_file:
419 @type checkm_file:
420 """
421 if not hasattr(checkm_file, "read"):
422 if os.path.isfile(checkm_file):
423 with codecs.open(checkm_file, encoding='utf-8', mode="r") as check_fh:
424 self._parse_lines(check_fh)
425 else:
426 raise NotFound(checkm_file=checkm_file)
427 else:
428 self._parse_lines(checkm_file)
429 return self.lines
430
432 """
433 FIXME
434 @param fh:
435 @type fh:
436 """
437 self.lines = []
438 line_buffer = ""
439 def _parse_line(line):
440 """
441 FIXME
442 @param line:
443 @type line:
444 """
445 if not line.startswith('#'):
446 tokens = filter(lambda x: x, re.split("\s+", line, 5))
447 logger.info(tokens)
448 if tokens:
449 self.lines.append(tokens)
450
451 for chunk in fh.read(0x1000):
452 line_buffer = line_buffer + chunk
453 while True:
454 if not line_buffer:
455 break
456 fragments = line_buffer.split('\n',1)
457 if len(fragments) == 1:
458 break
459 _parse_line(fragments[0])
460 line_buffer = fragments[1]
461
463 HASHTYPES = ['md5', 'sha1', 'sha224','sha256','sha384','sha512']
464 - def scan_local(self, directory_path, algorithm, columns=3):
465 """
466 FIXME
467 @param directory_path:
468 @type directory_path:
469 @param algorithm:
470 @type algorithm:
471 @param columns=3:
472 @type columns=3:
473 """
474 report = []
475 for item in os.listdir(directory_path):
476 item_path = os.path.join(directory_path, item)
477 report.append(self.scan_path(item_path, algorithm, columns))
478 return report
479
480 - def scan_tree(self, directory_path, algorithm, columns):
481 """
482 FIXME
483 @param directory_path:
484 @type directory_path:
485 @param algorithm:
486 @type algorithm:
487 @param columns:
488 @type columns:
489 """
490 report = []
491 if os.path.exists(directory_path):
492 for (dirpath, dirnames, filenames) in os.walk(directory_path):
493 for item_path in [os.path.join(dirpath, x) for x in dirnames+filenames]:
494 report.append(self.scan_path(item_path, algorithm, columns))
495 return report
496 else:
497 raise NotFound(directory_path=directory_path, recursive=True)
498
499 - def scan_path(self, item_path, algorithm, columns):
500 """
501 FIXME
502 @param item_path:
503 @type item_path:
504 @param algorithm:
505 @type algorithm:
506 @param columns:
507 @type columns:
508 """
509 if columns<3 or not isinstance(columns, int):
510 columns = 3
511 try:
512 line = []
513
514 line.append(unicode(item_path))
515
516 line.append(unicode(algorithm))
517
518 if os.path.isdir(item_path):
519 line.append(u'd')
520 else:
521
522 hash_gen = getattr(hashlib, algorithm)()
523 with open(item_path, 'rb') as fh:
524 logger.info("Checking %s with algorithm %s" % (item_path, algorithm))
525 chunk = fh.read(1024*8)
526 while chunk:
527 hash_gen.update(chunk)
528 chunk= fh.read(1024*8)
529 line.append(unicode(hash_gen.hexdigest()))
530 if columns>3:
531
532 line.append(unicode(os.stat(item_path)[ST_SIZE]))
533 if columns>4:
534
535 line.append(unicode(os.stat(item_path)[ST_MTIME]))
536 return line
537 except OSError:
538 raise NotFound(item_path=item_path)
539 except IOError:
540 raise NotFound(item_path=item_path)
541 except AttributeError:
542 raise ValueError("This tool cannot perform hashtype %s" % algorithm)
543
544 - def scan_directory(self, directory_path, algorithm, recursive=False, columns=3):
545 """
546 FIXME
547 @param directory_path:
548 @type directory_path:
549 @param algorithm:
550 @type algorithm:
551 @param recursive=False:
552 @type recursive=False:
553 @param columns=3:
554 @type columns=3:
555 """
556 if os.path.exists(directory_path):
557 if recursive:
558 return self.scan_tree(directory_path, algorithm, columns)
559 return self.scan_local(directory_path, algorithm, columns)
560 else:
561 raise NotFound(directory_path=directory_path, recursive=recursive)
562