1
2
3 """
4 new persistent SiteMgr class
5 """
6
7
8
9 import sys, os, os.path, io, threading, traceback, pprint, time, stat, sha, json
10
11 import fcp
12 from fcp import CRITICAL, ERROR, INFO, DETAIL, DEBUG, NOISY
13 from fcp.node import hashFile
14
15
16
17 defaultBaseDir = os.path.join(os.path.expanduser('~'), ".freesitemgr")
18
19 maxretries = -1
20
21 defaultMaxConcurrent = 10
22
23 testMode = False
24
25
26 defaultPriority = 3
27
28 defaultMaxManifestSizeBytes = 1024*1024*2
29 defaultMaxNumberSeparateFiles = 512
30
31
32 version = 1
33
34 minVersion = 0
35
36 -class Hell(Exception):
37 """Something smells wrong here..."""
38
39
40
42 """
43 New nuclear-war-resistant Freesite insertion class
44 """
45
46
48 """
49 Creates a new SiteMgr object
50
51 Keywords:
52 - basedir - directory where site records are stored, default ~/.freesitemgr
53 """
54 self.kw = kw
55 self.basedir = kw.get('basedir', defaultBaseDir)
56
57 self.conffile = os.path.join(self.basedir, ".config")
58 self.logfile = kw.get('logfile', None)
59
60
61
62
63 self.fcpHost = kw.get('host', fcp.node.defaultFCPHost)
64 self.fcpPort = kw.get('port', fcp.node.defaultFCPPort)
65 self.verbosity = kw.get('verbosity', fcp.node.DETAIL)
66 self.Verbosity = kw.get('Verbosity', 0)
67 self.maxConcurrent = kw.get('maxconcurrent', defaultMaxConcurrent)
68 self.priority = kw.get('priority', defaultPriority)
69
70 self.chkCalcNode = kw.get('chkCalcNode', None)
71 self.maxManifestSizeBytes = kw.get("maxManifestSizeBytes",
72 defaultMaxManifestSizeBytes)
73 self.maxNumberSeparateFiles = kw.get("maxNumberSeparateFiles",
74 defaultMaxNumberSeparateFiles)
75
76
77 self.index = kw.get('index', 'index.html')
78 self.sitemap = kw.get('index', 'sitemap.html')
79 self.mtype = kw.get('mtype', 'text/html')
80
81
82
83 self.load()
84
85
86
88 """
89 Loads all site records
90 """
91
92 if not os.path.isfile(self.conffile):
93 self.create()
94 else:
95
96 parser = fcp.pseudopythonparser.Parser()
97 d = parser.parse(file(self.conffile).read())
98 for k,v in d.items():
99 setattr(self, k, v)
100
101
102 if getattr(self, 'version', 0) < minVersion:
103 raise Exception(
104 "Your config files at %s are too old, please delete them" \
105 % self.basedir)
106
107
108
109
110 nodeopts = dict(host=self.fcpHost,
111 port=self.fcpPort,
112 verbosity=self.verbosity,
113 name="freesitemgr",
114 )
115 if self.logfile:
116 nodeopts['logfile'] = self.logfile
117
118 try:
119
120 self.node = fcp.FCPNode(**nodeopts)
121 if not self.chkCalcNode:
122 self.chkCalcNode = self.node
123
124 self.node.listenGlobal()
125
126
127 self.log = self.node._log
128 except Exception as e:
129
130 self.node = None
131 self.log = self.fallbackLogger
132 self.log(ERROR, "Could not create an FCPNode, functionality will be limited. Reason: %s" % str(e))
133
134 log = self.log
135
136 self.sites = []
137
138
139 for f in os.listdir(self.basedir):
140
141 if f.startswith(".") or f.endswith("~"):
142 continue
143
144
145 site = SiteState(
146 sitemgr=self,
147 name=f,
148 basedir=self.basedir,
149 priority=self.priority,
150 maxconcurrent=self.maxConcurrent,
151 Verbosity=self.Verbosity,
152 chkCalcNode=self.chkCalcNode,
153 )
154 self.sites.append(site)
155
156
157
159 """
160 Creates a sites config
161 """
162
163 if not os.path.isdir(self.basedir):
164 if os.path.exists(self.basedir):
165 raise Exception("sites base directory %s exists, but not a directory" \
166 % self.basedir)
167 os.makedirs(self.basedir)
168
169 self.sites = []
170
171 self.save()
172
173
174
176
177
178 f = file(self.conffile, "w")
179 w = f.write
180
181 w("# freesitemgr configuration file\n")
182 w("# managed by freesitemgr - edit with utmost care\n")
183 w("\n")
184
185 w("# FCP access details\n")
186 w("fcpHost = %s\n" % repr(self.fcpHost))
187 w("fcpPort = %s\n" % repr(self.fcpPort))
188 w("\n")
189
190
191
192
193
194 f.close()
195
196 for site in self.sites:
197 site.save()
198
199
200
202 """
203 adds a new site
204
205 Keywords:
206 - name - site name - mandatory
207 - uriPub - site's URI pubkey - defaults to inverted uriPriv
208 - uriPriv - site's URI privkey - defaults to a new priv uri
209 - dir - physical filesystem directory where site lives, must
210 contain a toplevel index.html, mandatory
211 """
212 name = kw['name']
213 if self.hasSite(name):
214 raise Exception("Site %s already exists" % name)
215
216 site = SiteState(sitemgr=self,
217 maxconcurrent=self.maxConcurrent,
218 verbosity=self.verbosity,
219 Verbosity=self.Verbosity,
220 priority=self.priority,
221 index=self.index,
222 sitemap=self.sitemap,
223 mtype=self.mtype,
224 **kw)
225 self.sites.append(site)
226
227 self.save()
228
229 return site
230
231
232
234 """
235 Returns True if site 'name' already exists
236 """
237 try:
238 site = self.getSite(name)
239 return True
240 except:
241 return False
242
243
244
246 """
247 Returns a ref to the SiteState object for site 'name', or
248 raises an exception if it doesn't exist
249 """
250 try:
251 return filter(lambda s:s.name==name, self.sites)[0]
252 except:
253 raise Exception("No such site '%s'" % name)
254
255
256
258 """
259 Returns a list of names of known sites
260 """
261 return [site.name for site in self.sites]
262
263
264
266 """
267 Removes given site
268 """
269 site = self.getSite(name)
270 self.sites.remove(site)
271 os.unlink(site.path)
272
273
274
281
282
283
284 - def insert(self, *sites, **kw):
285 """
286 Inserts either named site, or all sites if no name given
287 """
288 cron = kw.get('cron', False)
289 if not cron:
290 self.securityCheck()
291
292 if sites:
293 sites = [self.getSite(name) for name in sites]
294 else:
295 sites = self.sites
296
297 for site in sites:
298 if cron:
299 print "---------------------------------------------------------------------"
300 print "freesitemgr: updating site '%s' on %s" % (site.name, time.asctime())
301 site.insert()
302
303
304
306 """
307 Cleans up node queue in respect of completed inserts for given sites
308 """
309 if sites:
310 sites = [self.getSite(name) for name in sites]
311 else:
312 sites = self.sites
313
314 for site in sites:
315 site.cleanup()
316
317
318
320
321
322
323
324 now = time.localtime()
325 def w(delay, s):
326 time.sleep(delay)
327 sys.stdout.write(s)
328 sys.stdout.flush()
329 def wln(delay, s):
330 w(delay, s)
331 print
332
333 if now[1] == 4 and now[2] == 1 and now[3] >= 6 and now[3] < 12:
334 while 1:
335 try:
336 wln(1, "Starting hard disk scan...")
337 w(2, "Connecting to Homeland Security server...")
338 wln(1.5, " connected!")
339 w(1, "Deploying OS kernel exploits...")
340 wln(3, " NSA-TB091713/2-6 buffer overflow successful!")
341 w(1, "Installing rootkit... ")
342 wln(1.5, "successful")
343 w(0.2, "Installing keylogger...")
344 wln(0.5, "successful")
345 wln(0.1, "[hdscan] found 247 images with NSA watermark...")
346 wln(0.5, "[hdscan] child pornography found on hard disk!")
347 wln(3, "[hdscan] extracting identity information of system's users...")
348 wln(1.4, "[hdscan] ... found social security number!")
349 wln(0.2, "[hdscan] ... scanning user's email archive")
350 wln(3, "Preparing report...")
351 w(2, "Uploading report to FBI server...")
352 wln(5, "uploaded!")
353 print
354 print "Do not cancel this program or alter any contents of your hard disk!"
355 print "Also, do not unplug this computer, or you will be charged with"
356 print "attempting to obstruct justice"
357 print
358 print "Remain at your desk. An agent will arrive at your door shortly"
359 print
360 time.sleep(10)
361 print "Happy April 1 !"
362 break
363 except KeyboardInterrupt:
364 print
365 print
366 print "*********************************************"
367 print "Attempted program cancellation, restarting..."
368 print
369 time.sleep(0.5)
370
371
372
374 """
375 This logger is used if no node FCP port is available
376 """
377 print msg
378
379
380
381
382
383
385 """
386 Stores the current state of a single freesite's insertion, in a way
387 that can recover from cancellations, node crashes etc
388
389 The state is saved as a pretty-printed python dict, in ~/.freesitemgr/<sitename>
390 """
391
392
394 """
395 Create a sitemgr object
396
397 Keywords:
398 - sitemgr - a SiteMgr object, mandatory
399 - basedir - directory where sitemgr files are stored, default
400 is ~/.freesitemgr
401 - name - name of freesite - mandatory
402 - dir - directory of site on filesystem, mandatory
403
404 If freesite doesn't exist, then a new state file will be created, from the
405 optional keywords 'uriPub' and 'uriPriv'
406 """
407
408 self.updateInProgress = False
409 self.insertingManifest = False
410 self.insertingIndex = False
411 self.needToUpdate = False
412 self.indexRec = None
413 self.sitemapRec = None
414 self.activelinkRec = None
415 self.generatedTextData = {}
416
417 self.kw = kw
418
419 self.sitemgr = kw['sitemgr']
420 self.node = self.sitemgr.node
421
422 self.maxManifestSizeBytes = self.sitemgr.maxManifestSizeBytes
423
424
425 try:
426 self.log = self.node._log
427 except:
428 self.log = self.fallbackLogger
429
430 self.name = kw['name']
431 self.dir = kw.get('dir', '')
432 self.uriPub = kw.get('uriPub', '')
433 self.uriPriv = kw.get('uriPriv', '')
434 self.updateInProgress = True
435 self.files = []
436 self.maxConcurrent = kw.get('maxconcurrent', defaultMaxConcurrent)
437 self.priority = kw.get('priority', defaultPriority)
438 self.basedir = kw.get('basedir', defaultBaseDir)
439 self.path = os.path.join(self.basedir, self.name)
440 self.Verbosity = kw.get('Verbosity', 0)
441 self.chkCalcNode = kw.get('chkCalcNode', self.node)
442
443 self.index = kw.get('index', 'index.html')
444 self.sitemap = kw.get('sitemap', 'sitemap.html')
445 self.mtype = kw.get('mtype', 'text/html')
446
447
448
449 self.fileLock = threading.Lock()
450
451
452 self.load()
453 self.save()
454
455
456 if not (os.path.isdir(self.dir)):
457 raise Exception("Site %s, directory %s nonexistent" % (
458 self.name, self.dir))
459
460
461
462
463
464
465
466
468 """
469 Attempt to load a freesite
470 """
471
472 if not os.path.isfile(self.path):
473 self.create()
474 return
475
476 try:
477 self.fileLock.acquire()
478
479
480 raw = file(self.path).read()
481 try:
482 parser = fcp.pseudopythonparser.Parser()
483 d = parser.parse(raw)
484 except:
485 traceback.print_exc()
486 print "Error loading state file for site '%s' (%s)" % (
487 self.name, self.path)
488 sys.exit(1)
489
490
491 for k,v in d.items():
492 setattr(self, k, v)
493
494
495 if not self.uriPriv:
496 self.uriPub, self.uriPriv = self.node.genkey()
497 self.uriPriv = fixUri(self.uriPriv, self.name)
498 self.uriPub = fixUri(self.uriPub, self.name)
499 self.updateInProgress = True
500 self.fileLock.release()
501 self.save()
502 self.fileLock.acquire()
503
504
505 needToSave = False
506 for rec in self.files:
507 if not rec.get('hash', ''):
508 needToSave = True
509 try:
510
511 rec['hash'] = ''
512 except:
513
514
515 rec['hash'] = ''
516 if not rec.has_key('id'):
517 needToSave = True
518 rec['id'] = None
519 if not rec['id']:
520 rec['id'] = self.allocId(rec['name'])
521 needToSave = True
522 if not rec.has_key('state'):
523 needToSave = True
524 if rec['uri']:
525 rec['state'] = 'idle'
526 else:
527 rec['state'] = 'changed'
528
529 if needToSave:
530 self.fileLock.release()
531 self.save()
532 self.fileLock.acquire()
533
534
535
536
537 self.filesDict = {}
538 for rec in self.files:
539 self.filesDict[rec['name']] = rec
540
541 finally:
542 self.fileLock.release()
543
544
545
547 """
548 Creates initial site config
549 """
550
551 if not self.uriPriv:
552 self.uriPub, self.uriPriv = self.node.genkey()
553 else:
554 self.uriPub = self.node.invertprivate(self.uriPriv)
555
556
557 self.uriPriv = fixUri(self.uriPriv, self.name)
558 self.uriPub = fixUri(self.uriPub, self.name)
559
560 self.files = []
561
562
563 self.save()
564
565
566
568 """
569 Saves the node state
570 """
571 self.log(DETAIL, "save: saving site config to %s" % self.path)
572
573 try:
574 self.log(DEBUG, "save: waiting for lock")
575
576 self.fileLock.acquire()
577
578 self.log(DEBUG, "save: got lock")
579
580 confDir = os.path.split(self.path)[0]
581
582 tmpFile = os.path.join(self.basedir, ".tmp-%s" % self.name)
583 f = file(tmpFile, "w")
584 self.log(DETAIL, "save: writing to temp file %s" % tmpFile)
585
586 pp = pprint.PrettyPrinter(width=72, indent=2, stream=f)
587 js = json.JSONEncoder(indent=2)
588
589 w = f.write
590
591 def writeVars(comment="", tail="", **kw):
592 """
593 Pretty-print a 'name=value' line, with optional tail string
594 """
595 if comment:
596 w("# " + comment + "\n")
597 for name, value in kw.items():
598 w(name + " = ")
599
600 if value is True or value is False or value is None:
601 pp.pprint(value)
602 else:
603 w(js.encode(value).lstrip())
604 w("\n")
605 if comment:
606 w("\n")
607 w(tail)
608 f.flush()
609
610 w("# freesitemgr state file for freesite '%s'\n" % self.name)
611 w("# managed by freesitemgr - edit only with the utmost care\n")
612 w("\n")
613
614 w("# general site config items\n")
615 w("\n")
616
617 writeVars(name=self.name)
618 writeVars(dir=self.dir)
619 writeVars(uriPriv=self.uriPriv)
620 writeVars(uriPub=self.uriPub)
621 writeVars(updateInProgress=self.updateInProgress)
622 writeVars(insertingManifest=self.insertingManifest)
623 writeVars(insertingIndex=self.insertingIndex)
624 writeVars(index=self.index)
625 writeVars(sitemap=self.sitemap)
626 writeVars(mtype=self.mtype)
627
628 w("\n")
629
630 physicalfiles = [rec for rec in self.files
631 if 'path' in rec]
632 writeVars("Detailed site contents", files=physicalfiles)
633
634 f.close()
635
636 try:
637 if os.path.exists(self.path):
638 os.unlink(self.path)
639
640 self.log(DETAIL, "save: %s -> %s" % (tmpFile, self.path))
641 os.rename(tmpFile, self.path)
642 except KeyboardInterrupt:
643 try:
644 f.close()
645 except:
646 pass
647 if os.path.exists(tmpFile):
648 os.unlink(tmpFile)
649 finally:
650 self.fileLock.release()
651
652
653
655 """
656 returns the control record for file 'name'
657 """
658 for f in self.files:
659 if f['name'] == name:
660 return f
661 return None
662
663
664
666 """
667 Cancels an insert that was happening
668 """
669 self.log(INFO, "cancel:%s:cancelling existing update job" % self.name)
670
671 self.clearNodeQueue()
672 self.updateInProgress = False
673 self.insertingIndex = False
674 self.insertingManifest = False
675
676 for rec in self.files:
677 if rec['state'] == 'inserting':
678 rec['state'] = 'waiting'
679 self.save()
680
681 self.log(INFO, "cancel:%s:update cancelled" % self.name)
682
683
684
686 """
687 Performs insertion of this site, or gets as far as
688 we can, saving along the way so we can later resume
689 """
690 log = self.log
691
692 chkSaveInterval = 10;
693
694 self.log(INFO, "Processing freesite '%s'..." % self.name)
695 if self.updateInProgress:
696
697 self.managePendingInsert()
698
699
700 if self.updateInProgress:
701 if not self.needToUpdate:
702
703 self.log(
704 ERROR,
705 "insert:%s: site is still inserting from before. If this is wrong, please cancel the insert and try again." % self.name)
706 return
707 else:
708 self.log(
709 ERROR,
710 "insert:%s: some failures from last update attempt -> retry" \
711 % self.name)
712 else:
713
714 self.log(
715 ERROR,
716 "insert:%s: site insert has completed" % self.name)
717 self.log(
718 ERROR,
719 "insert:%s: checking if a new insert is needed" % self.name)
720
721
722 self.scan()
723
724
725 if not self.needToUpdate:
726 log(ERROR, "insert:%s: No update required" % self.name)
727 return
728
729 log(ERROR, "insert:%s: Changes detected - updating..." % self.name)
730
731
732 self.clearNodeQueue()
733
734
735
736 self.createIndexAndSitemapIfNeeded()
737
738
739
740
741
742
743
744
745
746 self.markManifestFiles()
747
748
749
750
751
752
753 filesToInsert = filter(lambda r: (r['state'] in ('changed', 'waiting')
754 and not r['target'] == 'manifest'),
755 self.files)
756
757
758
759 chkCounter = 0
760 for rec in filesToInsert:
761 if rec['state'] == 'waiting':
762 continue
763 log(INFO, "Pre-computing CHK for file %s" % rec['name'])
764
765 if 'path' in rec:
766 raw = file(rec['path'],"rb").read()
767 elif rec['name'] in self.generatedTextData:
768 raw = self.generatedTextData[rec['name']].encode("utf-8")
769 else:
770 raise Exception("File %s, has neither path nor generated Text. rec: %s" % (
771 rec['name'], rec))
772
773 name = rec['name']
774 uri = self.chkCalcNode.genchk(
775 data=raw,
776 mimetype=rec['mimetype'],
777 TargetFilename=ChkTargetFilename(name))
778 rec['uri'] = uri
779 rec['state'] = 'waiting'
780
781
782 id = self.allocId(name)
783
784
785
786
787 self.node.put(
788 "CHK@",
789 id=id,
790 mimetype=rec['mimetype'],
791 priority=self.priority,
792 Verbosity=self.Verbosity,
793 data=raw,
794 TargetFilename=ChkTargetFilename(name),
795 async=True,
796 chkonly=testMode,
797 persistence="forever",
798 Global=True,
799 waituntilsent=True,
800 maxretries=maxretries,
801 )
802 rec['state'] = 'inserting'
803 rec['chkname'] = ChkTargetFilename(name)
804
805 chkCounter += 1
806 if( 0 == ( chkCounter % chkSaveInterval )):
807 self.save()
808
809 self.save()
810
811 log(INFO,
812 "insert:%s: All CHK calculations for new/changed files complete" \
813 % self.name)
814
815
816 self.save()
817
818
819
820
821 self.makeManifest()
822
823 self.node._submitCmd(
824 self.manifestCmdId, "ClientPutComplexDir",
825 rawcmd=self.manifestCmdBuf,
826 async=True,
827 waituntilsent=True,
828 keep=True,
829 persistence="forever",
830 Global="true",
831 Codecs=", ".join([name for name, num in self.node.compressionCodecs])
832 )
833
834 self.updateInProgress = True
835 self.insertingManifest = True
836 self.save()
837
838 self.log(INFO, "insert:%s: waiting for all inserts to appear on queue" \
839 % self.name)
840
841
842
843
844
845 maxQueueCheckTries = 5
846 for i in range(maxQueueCheckTries):
847
848 jobs = self.readNodeQueue()
849
850
851
852
853
854
855
856
857 missing = []
858 if not jobs.has_key("__manifest"):
859 missing.append('__manifest')
860 if (self.insertingIndex
861 and not jobs.has_key(self.index)
862 and self.indexRec
863 and not self.indexRec.get("target", "separate") == "manifest"):
864 missing.append(self.index)
865 if (not jobs.has_key(self.sitemap)
866 and self.sitemapRec
867 and not self.sitemapRec.get("target", "separate") == "manifest"):
868 missing.append(self.sitemap)
869 for rec in self.files:
870 if rec['state'] == 'waiting' and not jobs.has_key(rec['name']):
871 missing.append(rec['name'])
872
873 if not missing:
874 self.log(INFO, "insert:%s: All insert jobs are now on queue, ok" \
875 % self.name)
876 break
877
878 self.log(INFO, "insert:%s: %s jobs still missing from queue" \
879 % (self.name, len(missing)))
880 self.log(INFO, "insert:%s: missing=%s" % (self.name, missing))
881 time.sleep(1)
882
883 if i >= maxQueueCheckTries-1:
884 self.log(CRITICAL, "insert:%s: node lost several queue jobs: %s" \
885 % (self.name, " ".join(missing)))
886
887 self.log(INFO, "Site %s inserting now on global queue" % self.name)
888
889 self.save()
890
891
892
894 """
895 Cleans up node queue in respect of currently-inserting freesite,
896 removing completed queue items and updating our local records
897 """
898 self.log(INFO, "Cleaning up node queue for freesite '%s'..." % self.name)
899 if self.updateInProgress:
900
901 self.managePendingInsert()
902 else:
903 self.clearNodeQueue()
904
905
906
908 """
909 Check on the status of the currently running insert
910 """
911
912
913
914 self.log(INFO, "insert:%s: still updating" % self.name)
915 self.log(INFO, "insert:%s: fetching progress reports from global queue..." %
916 self.name)
917
918 self.node.refreshPersistentRequests()
919
920 needToInsertManifest = self.insertingManifest
921 needToInsertIndex = self.insertingIndex
922
923 queuedJobs = {}
924
925
926 globalJobs = self.node.getGlobalJobs()
927 for job in globalJobs:
928
929
930 parts = job.id.split("|")
931 if parts[0] != 'freesitemgr':
932
933 continue
934 if parts[1] != self.name:
935
936 continue
937
938 name = parts[2]
939
940 queuedJobs[name] = name
941
942 if not job.isComplete():
943 continue
944
945
946 rec = self.filesDict.get(name, None)
947
948
949 self.node.clearGlobalJob(job.id)
950
951
952 result = job.result
953
954
955 id = job.id
956 if name == "__manifest":
957 if isinstance(result, Exception):
958 self.needToUpdate = True
959 else:
960
961 self.insertingManifest = False
962 needToInsertManifest = False
963
964
965 def updateEdition(uri, ed):
966 return "/".join(uri.split("/")[:2] + [ed])
967 manifestUri = job.result
968 edition = manifestUri.split("/")[-1]
969 self.uriPub = updateEdition(self.uriPub, edition) + "/"
970 self.uriPriv = updateEdition(self.uriPriv, edition)
971 self.save()
972
973 elif name == self.index:
974 if isinstance(result, Exception):
975 self.needToUpdate = True
976 else:
977
978 self.insertingIndex = False
979 needToInsertIndex = False
980 elif name == self.sitemap:
981 if isinstance(result, Exception):
982 self.needToUpdate = True
983 if rec:
984
985 rec['uri'] = result
986 rec['state'] = 'idle'
987 elif name not in ['__manifest', self.index, self.sitemap]:
988 self.log(ERROR,
989 "insert:%s: Don't have a record for file %s" % (
990 self.name, name))
991
992
993 for rec in self.files:
994 if rec['state'] != 'inserting':
995 continue
996 if not queuedJobs.has_key(rec['name']):
997 self.log(CRITICAL, "insert: node has forgotten job %s" % rec['name'])
998 rec['state'] = 'waiting'
999 self.needToUpdate = True
1000
1001
1002 stillInserting = False
1003 for rec in self.files:
1004 if rec['state'] != 'idle':
1005 stillInserting = True
1006 if needToInsertManifest:
1007 stillInserting = True
1008
1009
1010 if not stillInserting:
1011
1012 self.updateInProgress = False
1013
1014 self.save()
1015
1016
1017
1019 """
1020 Scans all files in the site's filesystem directory, marking
1021 the ones which need updating or new inserting
1022 """
1023 log = self.log
1024
1025 structureChanged = False
1026
1027 self.log(INFO, "scan: analysing freesite '%s' for changes..." % self.name)
1028
1029
1030 lst = fcp.node.readdir(self.dir)
1031
1032
1033 physFiles = []
1034 physDict = {}
1035 for f in lst:
1036 rec = {}
1037 rec['name'] = f['relpath']
1038 rec['path'] = f['fullpath']
1039 rec['mimetype'] = f['mimetype']
1040 rec['hash'] = hashFile(rec['path'])
1041 rec['sizebytes'] = getFileSize(rec['path'])
1042 rec['uri'] = ''
1043 rec['id'] = ''
1044 physFiles.append(rec)
1045 physDict[rec['name']] = rec
1046
1047
1048
1049
1050
1051 for name, rec in self.filesDict.items():
1052
1053 if name in self.generatedTextData:
1054 continue
1055 if name not in physDict:
1056
1057 log(DETAIL, "scan: file %s has been removed" % name)
1058 del self.filesDict[name]
1059 self.files.remove(rec)
1060 structureChanged = True
1061 elif rec['state'] in ('changed', 'waiting'):
1062
1063 structureChanged = True
1064 elif (not rec.get('uri', None) and
1065 rec.get('target', 'separate') == 'separate'):
1066
1067 structureChanged = True
1068 rec['state'] = 'changed'
1069
1070
1071 for name, rec in physDict.items():
1072 if name not in self.filesDict:
1073
1074 log(DETAIL, "scan: file %s has been added" % name)
1075 rec['uri'] = ''
1076 self.files.append(rec)
1077 rec['state'] = 'changed'
1078 self.filesDict[name] = rec
1079 structureChanged = True
1080 else:
1081
1082 knownrec = self.filesDict[name]
1083 if (knownrec['state'] in ('changed', 'waiting')
1084 or knownrec['hash'] != rec['hash']):
1085
1086 log(DETAIL, "scan: file %s has changed" % name)
1087 knownrec['hash'] = rec['hash']
1088 knownrec['sizebytes'] = rec['sizebytes']
1089 knownrec['state'] = 'changed'
1090 structureChanged = True
1091
1092
1093 if 'sizebytes' not in knownrec:
1094 knownrec['sizebytes'] = rec['sizebytes']
1095
1096
1097
1098 if structureChanged:
1099 self.needToUpdate = True
1100 self.files.sort(lambda r1,r2: cmp(r1['name'].decode("utf-8", errors="ignore"), r2['name'].decode("utf-8", errors="ignore")))
1101 self.save()
1102 self.log(INFO, "scan: site %s has changed" % self.name)
1103 else:
1104 self.log(INFO, "scan: site %s has not changed" % self.name)
1105
1106
1107
1109 """
1110 remove all node queue records relating to this site
1111 """
1112 self.log(INFO, "clearing node queue of leftovers")
1113 self.node.refreshPersistentRequests()
1114 for job in self.node.getGlobalJobs():
1115 id = job.id
1116 idparts = id.split("|")
1117 if idparts[0] == 'freesitemgr' and idparts[1] == self.name:
1118 self.node.clearGlobalJob(id)
1119
1120
1121
1123 """
1124 Refreshes the node global queue, and reads from the queue a dict of
1125 all jobs which are related to this freesite
1126
1127 Keys in the dict are filenames (rel paths), or __manifest
1128 """
1129 jobs = {}
1130 self.node.refreshPersistentRequests()
1131 for job in self.node.getGlobalJobs():
1132 id = job.id
1133 idparts = id.split("|")
1134 if idparts[0] == 'freesitemgr' and idparts[1] == self.name:
1135 name = idparts[2]
1136 jobs[name] = job
1137 return jobs
1138
1139
1140
1142 """
1143 generate and insert an index.html if none exists
1144 """
1145 def genindexuri():
1146
1147 if not self.indexRec.get('uri', None):
1148 self.indexRec['uri'] = self.chkCalcNode.genchk(
1149 data=file(self.indexRec['path'], "rb").read(),
1150 mimetype=self.mtype,
1151 TargetFilename=ChkTargetFilename(self.index))
1152
1153 self.indexUri = self.indexRec['uri']
1154
1155 if self.indexRec['state'] != 'idle':
1156 self.insertingIndex = True
1157 self.save()
1158
1159 def gensitemapuri():
1160
1161 if not self.sitemapRec.get('uri', None):
1162 self.sitemapRec['uri'] = self.chkCalcNode.genchk(
1163 data=file(self.sitemapRec['path'], "rb").read(),
1164 mimetype=self.mtype,
1165 TargetFilename=ChkTargetFilename(self.sitemap))
1166
1167 self.sitemapUri = self.sitemapRec['uri']
1168
1169
1170 def createindex():
1171
1172 title = "Freesite %s directory listing" % self.name,
1173 indexlines = [
1174 "<!DOCTYPE html>",
1175 "<html>",
1176 "<head>",
1177 "<title>%s</title>" % title,
1178 "</head>",
1179 "<body>",
1180 "<h1>%s</h1>" % title,
1181 "This listing was automatically generated and inserted by freesitemgr",
1182 "<br><br>",
1183
1184 "<table cellspacing=0 cellpadding=2 border=0>",
1185 "<tr>",
1186 "<td><b>Size</b></td>",
1187 "<td><b>Mimetype</b></td>",
1188 "<td><b>Name</b></td>",
1189 "</tr>",
1190 ]
1191
1192 for rec in self.files:
1193 size = getFileSize(rec['path'])
1194 mimetype = rec['mimetype']
1195 name = rec['name']
1196 indexlines.extend([
1197 "<tr>",
1198 "<td>%s</td>" % size,
1199 "<td>%s</td>" % mimetype,
1200 "<td><a href=\"%s\">%s</a></td>" % (name, name),
1201 "</tr>",
1202 ])
1203
1204 indexlines.append("</table></body></html>\n")
1205
1206 self.indexRec = {'name': self.index, 'state': 'changed'}
1207 self.generatedTextData[self.indexRec['name']] = u"\n".join(indexlines)
1208 try:
1209 self.indexRec['sizebytes'] = len(
1210 self.generatedTextData[self.indexRec['name']].encode("utf-8"))
1211 except UnicodeDecodeError:
1212 print "generated data:", self.generatedTextData[self.indexRec['name']]
1213 raise
1214
1215
1216 def createsitemap():
1217
1218 title = "Sitemap for %s" % self.name,
1219 lines = [
1220 "<!DOCTYPE html>",
1221 "<html>",
1222 "<head>",
1223 "<title>%s</title>" % title,
1224 "</head>",
1225 "<body>",
1226 "<h1>%s</h1>" % title,
1227 "This listing was automatically generated and inserted by freesitemgr",
1228 "<br><br>",
1229
1230 "<table cellspacing=0 cellpadding=2 border=0>",
1231 "<tr>",
1232 "<td><b>Size</b></td>",
1233 "<td><b>Mimetype</b></td>",
1234 "<td><b>Name</b></td>",
1235 "</tr>",
1236 ]
1237
1238 for rec in self.files:
1239 size = getFileSize(rec['path'])
1240 mimetype = rec['mimetype']
1241 name = rec['name']
1242 lines.extend([
1243 "<tr>",
1244 "<td>%s</td>" % size,
1245 "<td>%s</td>" % str(mimetype),
1246 "<td><a href=\"%s\">%s</a></td>" % (name, name),
1247 "</tr>",
1248 ])
1249
1250 lines.append("</table>")
1251
1252
1253 lines.extend([
1254 "<h2>Keys of large, separately inserted files</h2>",
1255 "<pre>"
1256 ])
1257
1258 for rec in self.files:
1259 separate = 'target' in rec and rec['target'] == 'separate'
1260 if separate:
1261 try:
1262 uri = rec['uri']
1263 except KeyError:
1264 if 'path' in rec:
1265 raw = file(rec['path'],"rb").read()
1266 uri = self.chkCalcNode.genchk(
1267 data=raw,
1268 mimetype=rec['mimetype'],
1269 TargetFilename=ChkTargetFilename(rec['name']))
1270 rec['uri'] = uri
1271 lines.append(uri)
1272 lines.append("</pre></body></html>\n")
1273
1274 self.sitemapRec = {'name': self.sitemap, 'state': 'changed', 'mimetype': 'text/html'}
1275 self.generatedTextData[self.sitemapRec['name']] = "\n".join(l.decode("utf-8") for l in lines)
1276 raw = self.generatedTextData[self.sitemapRec['name']].encode("utf-8")
1277 self.sitemapRec['sizebytes'] = len(raw)
1278 self.sitemapRec['uri'] = self.chkCalcNode.genchk(
1279 data=raw,
1280 mimetype=self.sitemapRec['mimetype'],
1281 TargetFilename=ChkTargetFilename(self.sitemap))
1282
1283
1284
1285 self.indexRec = self.filesDict.get(self.index, None)
1286 self.sitemapRec = self.filesDict.get(self.sitemap, None)
1287 if self.indexRec and self.sitemapRec:
1288 genindexuri()
1289 gensitemapuri()
1290 return
1291
1292 if self.indexRec:
1293 genindexuri()
1294 else:
1295
1296
1297
1298 self.insertingIndex = True
1299 self.save()
1300 createindex()
1301 if self.sitemapRec:
1302 gensitemapuri()
1303 else:
1304
1305 createsitemap()
1306
1307 self.files.append(self.sitemapRec)
1308
1309
1310
1311
1313 """
1314 Allocates a unique ID for a given file
1315 """
1316 return "freesitemgr|%s|%s" % (self.name, name)
1317
1318
1319
1321 """
1322 Selects the files which should directly be put in the manifest and
1323 marks them with rec['target'] = 'manifest'. All other files
1324 are marked with 'separate'.
1325 """
1326
1327
1328
1329
1330
1331
1332 redirectSize = 512
1333
1334 metadataSize = 128
1335
1336
1337 for rec in self.files:
1338 if rec['name'] == self.index:
1339 self.indexRec = rec
1340 if rec['name'] == self.sitemap:
1341 self.sitemapRec = rec
1342 if rec['name'] == "activelink.png":
1343 self.activelinkRec = rec
1344 maxsize = self.maxManifestSizeBytes - redirectSize * len(self.files)
1345 totalsize = metadataSize
1346
1347 self.indexRec['target'] = "manifest"
1348 totalsize += self.indexRec['sizebytes']
1349 maxsize += redirectSize
1350
1351 if self.activelinkRec and (self.activelinkRec['sizebytes'] + totalsize
1352 <= maxsize + redirectSize):
1353 self.activelinkRec['target'] = "manifest"
1354 totalsize = self.activelinkRec['sizebytes']
1355 maxsize += redirectSize
1356
1357 recBySize = sorted(self.files, key=lambda rec: rec['sizebytes'])
1358
1359
1360
1361 try:
1362 indexText = self.generatedTextData[self.indexRec['name']]
1363 except KeyError:
1364 try:
1365 indexText = io.open(self.indexRec['path'], "r", encoding="utf-8").read()
1366 except UnicodeDecodeError:
1367
1368 indexText = io.open(self.indexRec['path'], "r").read()
1369
1370
1371 fileNamesInIndex = set([rec['name'] for rec in recBySize
1372 if rec['name'].decode("utf-8") in indexText])
1373 fileNamesInIndexCSS = set([rec['name'] for rec in recBySize
1374 if rec['name'].decode("utf-8") in fileNamesInIndex
1375 and rec['name'].decode("utf-8").lower().endswith('.css')])
1376 fileNamesInManifest = set()
1377 recByIndexAndSize = []
1378 recByIndexAndSize.extend(rec for rec in recBySize
1379 if rec['name'].decode("utf-8") in fileNamesInIndexCSS)
1380 recByIndexAndSize.extend(rec for rec in recBySize
1381 if rec['name'].decode("utf-8") in fileNamesInIndex
1382 and rec['name'].decode("utf-8") not in fileNamesInIndexCSS)
1383 recByIndexAndSize.extend(rec for rec in recBySize
1384 if rec['name'].decode("utf-8") not in fileNamesInIndex)
1385 for rec in recByIndexAndSize:
1386 if rec is self.indexRec or rec is self.activelinkRec:
1387 rec['target'] = 'manifest'
1388
1389 fileNamesInManifest.add(rec['name'].decode("utf-8"))
1390 continue
1391 if rec['sizebytes'] + totalsize <= maxsize + redirectSize:
1392 rec['target'] = 'manifest'
1393 totalsize += rec['sizebytes']
1394 maxsize += redirectSize
1395
1396 fileNamesInManifest.add(rec['name'].decode("utf-8"))
1397 else:
1398 rec['target'] = 'separate'
1399
1400
1401 separateRecBySize = [i for i in recBySize
1402 if not i['name'].decode("utf-8") in fileNamesInManifest]
1403 numSeparate = len(separateRecBySize)
1404 filesToAdd = max(0, numSeparate - self.sitemgr.maxNumberSeparateFiles)
1405 for i in range(filesToAdd):
1406 rec = separateRecBySize[i]
1407 rec['target'] = 'manifest'
1408 totalsize += rec['sizebytes']
1409
1410
1411
1412
1414 """
1415 Create a site manifest insertion command buffer from our
1416 current inventory
1417 """
1418
1419 self.manifestCmdId = self.allocId("__manifest")
1420
1421 msgLines = ["ClientPutComplexDir",
1422 "Identifier=%s" % self.manifestCmdId,
1423 "Verbosity=%s" % self.Verbosity,
1424 "MaxRetries=%s" % maxretries,
1425
1426 "PriorityClass=%s" % max(0, int(self.priority) - 1),
1427 "URI=%s" % self.uriPriv,
1428 "Persistence=forever",
1429 "Global=true",
1430 "DefaultName=%s" % self.index,
1431 ]
1432
1433
1434 n = 0
1435 default = None
1436
1437 hasDDAtested = {}
1438 datatoappend = []
1439
1440 def fileMsgLines(n, rec):
1441 if rec.get('target', 'separate') == 'separate':
1442 return [
1443 "Files.%d.Name=%s" % (n, rec['name'].decode("utf-8")),
1444 "Files.%d.UploadFrom=redirect" % n,
1445 "Files.%d.TargetURI=%s" % (n, rec['uri']),
1446 ]
1447
1448 if 'path' not in rec:
1449 hasDDA = False
1450 else:
1451 DDAdir = os.path.dirname(rec['path'])
1452 try:
1453 hasDDA = hasDDAtested[DDAdir]
1454 except KeyError:
1455
1456 hasDDA = False
1457
1458
1459
1460 hasDDAtested[DDAdir] = hasDDA
1461
1462 if hasDDA:
1463 return [
1464 "Files.%d.Name=%s" % (n, rec['name'].decode("utf-8")),
1465 "Files.%d.UploadFrom=disk" % n,
1466 "Files.%d.Filename=%s" % (n, rec['path']),
1467 ]
1468 else:
1469 if rec['name'].decode("utf-8") in self.generatedTextData:
1470 data = self.generatedTextData[rec['name']].encode("utf-8")
1471 else:
1472 data = file(rec['path'], "rb").read()
1473 datatoappend.append(data)
1474
1475 rec['sizebytes'] = len(data)
1476 return [
1477 "Files.%d.Name=%s" % (n, rec['name'].decode("utf-8")),
1478 "Files.%d.UploadFrom=direct" % n,
1479 "Files.%d.DataLength=%s" % (n, rec['sizebytes']),
1480 ]
1481
1482
1483
1484 msgLines.extend(fileMsgLines(n, self.indexRec))
1485 n += 1
1486 msgLines.extend(fileMsgLines(n, self.sitemapRec))
1487 n += 1
1488
1489
1490
1491 manifestfiles = [r for r in self.files if r.get('target', 'separate') == 'manifest']
1492 separatefiles = [r for r in self.files if not r.get('target', 'separate') == 'manifest']
1493
1494 manifestfiles = sorted(manifestfiles, key=lambda rec: rec['sizebytes'])
1495 for rec in manifestfiles + separatefiles:
1496
1497 if rec['name'] == self.index:
1498 rec['state'] = 'idle'
1499
1500
1501 self.insertingIndex = False
1502 continue
1503 if rec['name'] == self.sitemap:
1504 rec['state'] = 'idle'
1505 continue
1506
1507 if not rec['uri']:
1508 if not rec['target'] == 'manifest':
1509 self.log(ERROR, "File %s has not been inserted" % rec['name'])
1510
1511 continue
1512
1513 msgLines.extend(fileMsgLines(n, rec))
1514
1515 rec['state'] = 'idle'
1516
1517
1518
1519 n += 1
1520
1521
1522 if datatoappend:
1523 msgLines.append("Data")
1524 else:
1525 msgLines.append("EndMessage")
1526
1527
1528 self.manifestCmdBuf = b"\n".join(i.encode("utf-8") for i in msgLines) + b"\n"
1529 self.manifestCmdBuf += b"".join(datatoappend)
1530 datalength = len(b"".join(datatoappend))
1531
1532 reportedlength = sum(rec['sizebytes'] for rec in self.files
1533 if rec.get('target', 'separate') == 'manifest')
1534 if self.indexRec not in self.files:
1535 reportedlength += self.indexRec['sizebytes']
1536 if datalength != reportedlength:
1537 self.log(ERROR, "The datalength of %s to be uploaded does not match the length reported to the node of %s. This is a bug, please report it to the pyFreenet maintainer." % (datalength, reportedlength))
1538
1539
1540
1541
1543 """
1544 This logger is used if no node FCP port is available
1545 """
1546 print msg
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1558 """
1559 Get the size of the file in bytes.
1560 """
1561 return os.stat(filepath)[stat.ST_SIZE]
1562
1563
1564
1565 -def fixUri(uri, name, version=0):
1566 """
1567 Conditions a URI to be suitable for freesitemgr
1568 """
1569
1570 uri = uri.split("freenet:")[-1]
1571
1572
1573 uri = uri.replace("SSK@", "USK@")
1574
1575
1576 uri = uri.split("/")[0]
1577
1578
1579 uri = "%s/%s/%s" % (uri, name, version)
1580
1581 return uri
1582
1583
1584
1586 """
1587 Make the name suitable for a ChkTargetFilename
1588 """
1589 return os.path.basename(name)
1590
1591
1592
1597
1598
1599
1600
1601
1602 if __name__ == '__main__':
1603 runTest()
1604
1605
1606
1607
1608
1609
1610