Package fcp :: Module sitemgr
[hide private]
[frames] | no frames]

Source Code for Module fcp.sitemgr

   1  #@+leo-ver=4 
   2  #@+node:@file sitemgr.py 
   3  """ 
   4  new persistent SiteMgr class 
   5  """ 
   6   
   7  #@+others 
   8  #@+node:imports 
   9  import sys, os, os.path, io, threading, traceback, pprint, time, stat, sha, json 
  10   
  11  import fcp 
  12  from fcp import CRITICAL, ERROR, INFO, DETAIL, DEBUG, NOISY 
  13  from fcp.node import hashFile 
  14   
  15  #@-node:imports 
  16  #@+node:globals 
  17  defaultBaseDir = os.path.join(os.path.expanduser('~'), ".freesitemgr") 
  18   
  19  maxretries = -1 
  20   
  21  defaultMaxConcurrent = 10 
  22   
  23  testMode = False 
  24  #testMode = True 
  25   
  26  defaultPriority = 3 
  27   
  28  defaultMaxManifestSizeBytes = 1024*1024*2 # 2.0 MiB: As used by the freenet default dir inserter. Reduced by 512 bytes per redirect. TODO: Add a larger side-container for additional medium-size files like images. Doing this here, because here we know what is linked in the index file. 
  29  defaultMaxNumberSeparateFiles = 512 # ad hoq - my node sometimes dies at 500 simultaneous uploads. This is half the space in the estimated size of the manifest. 
  30   
  31   
  32  version = 1 
  33   
  34  minVersion = 0 
  35   
36 -class Hell(Exception):
37 """Something smells wrong here..."""
38 39 #@-node:globals 40 #@+node:class SiteMgr
41 -class SiteMgr:
42 """ 43 New nuclear-war-resistant Freesite insertion class 44 """ 45 #@ @+others 46 #@+node:__init__
47 - def __init__(self, **kw):
48 """ 49 Creates a new SiteMgr object 50 51 Keywords: 52 - basedir - directory where site records are stored, default ~/.freesitemgr 53 """ 54 self.kw = kw 55 self.basedir = kw.get('basedir', defaultBaseDir) 56 57 self.conffile = os.path.join(self.basedir, ".config") 58 self.logfile = kw.get('logfile', None) 59 60 # set defaults 61 #print "SiteMgr: kw=%s" % kw 62 63 self.fcpHost = kw.get('host', fcp.node.defaultFCPHost) 64 self.fcpPort = kw.get('port', fcp.node.defaultFCPPort) 65 self.verbosity = kw.get('verbosity', fcp.node.DETAIL) 66 self.Verbosity = kw.get('Verbosity', 0) 67 self.maxConcurrent = kw.get('maxconcurrent', defaultMaxConcurrent) 68 self.priority = kw.get('priority', defaultPriority) 69 70 self.chkCalcNode = kw.get('chkCalcNode', None) 71 self.maxManifestSizeBytes = kw.get("maxManifestSizeBytes", 72 defaultMaxManifestSizeBytes) 73 self.maxNumberSeparateFiles = kw.get("maxNumberSeparateFiles", 74 defaultMaxNumberSeparateFiles) 75 76 77 self.index = kw.get('index', 'index.html') 78 self.sitemap = kw.get('index', 'sitemap.html') 79 self.mtype = kw.get('mtype', 'text/html') 80 # To decide whether to upload index and activelink as part of 81 # the manifest, we need to remember their record. 82 83 self.load()
84 85 #@-node:__init__ 86 #@+node:load
87 - def load(self):
88 """ 89 Loads all site records 90 """ 91 # ensure directory at least exists 92 if not os.path.isfile(self.conffile): 93 self.create() 94 else: 95 # load existing config 96 parser = fcp.pseudopythonparser.Parser() 97 d = parser.parse(file(self.conffile).read()) 98 for k,v in d.items(): 99 setattr(self, k, v) 100 101 # barf if configs are too old 102 if getattr(self, 'version', 0) < minVersion: 103 raise Exception( 104 "Your config files at %s are too old, please delete them" \ 105 % self.basedir) 106 107 # get a node object 108 #print "load: verbosity=%s" % self.verbosity 109 110 nodeopts = dict(host=self.fcpHost, 111 port=self.fcpPort, 112 verbosity=self.verbosity, 113 name="freesitemgr", 114 ) 115 if self.logfile: 116 nodeopts['logfile'] = self.logfile 117 118 try: 119 # create node, if we can 120 self.node = fcp.FCPNode(**nodeopts) 121 if not self.chkCalcNode: 122 self.chkCalcNode = self.node 123 124 self.node.listenGlobal() 125 126 # borrow the node's logger 127 self.log = self.node._log 128 except Exception as e: 129 # limited functionality - no node 130 self.node = None 131 self.log = self.fallbackLogger 132 self.log(ERROR, "Could not create an FCPNode, functionality will be limited. Reason: %s" % str(e)) 133 134 log = self.log 135 136 self.sites = [] 137 138 # load up site records 139 for f in os.listdir(self.basedir): 140 # skip the main config file, or emacs leftovers, or anything starting with '.' 141 if f.startswith(".") or f.endswith("~"): 142 continue 143 144 # else it's a site, load it 145 site = SiteState( 146 sitemgr=self, 147 name=f, 148 basedir=self.basedir, 149 priority=self.priority, 150 maxconcurrent=self.maxConcurrent, 151 Verbosity=self.Verbosity, 152 chkCalcNode=self.chkCalcNode, 153 ) 154 self.sites.append(site)
155 156 #@-node:load 157 #@+node:create
158 - def create(self):
159 """ 160 Creates a sites config 161 """ 162 # ensure directory exists 163 if not os.path.isdir(self.basedir): 164 if os.path.exists(self.basedir): 165 raise Exception("sites base directory %s exists, but not a directory" \ 166 % self.basedir) 167 os.makedirs(self.basedir) 168 169 self.sites = [] 170 171 self.save()
172 173 #@-node:create 174 #@+node:save
175 - def save(self):
176 177 # now write out some boilerplate 178 f = file(self.conffile, "w") 179 w = f.write 180 181 w("# freesitemgr configuration file\n") 182 w("# managed by freesitemgr - edit with utmost care\n") 183 w("\n") 184 185 w("# FCP access details\n") 186 w("fcpHost = %s\n" % repr(self.fcpHost)) 187 w("fcpPort = %s\n" % repr(self.fcpPort)) 188 w("\n") 189 190 #w("# verbosity of FCP commands\n") 191 #w("verbosity = %s\n" % repr(self.verbosity)) 192 #w("\n") 193 194 f.close() 195 196 for site in self.sites: 197 site.save()
198 199 #@-node:save 200 #@+node:addSite
201 - def addSite(self, **kw):
202 """ 203 adds a new site 204 205 Keywords: 206 - name - site name - mandatory 207 - uriPub - site's URI pubkey - defaults to inverted uriPriv 208 - uriPriv - site's URI privkey - defaults to a new priv uri 209 - dir - physical filesystem directory where site lives, must 210 contain a toplevel index.html, mandatory 211 """ 212 name = kw['name'] 213 if self.hasSite(name): 214 raise Exception("Site %s already exists" % name) 215 216 site = SiteState(sitemgr=self, 217 maxconcurrent=self.maxConcurrent, 218 verbosity=self.verbosity, 219 Verbosity=self.Verbosity, 220 priority=self.priority, 221 index=self.index, 222 sitemap=self.sitemap, 223 mtype=self.mtype, 224 **kw) 225 self.sites.append(site) 226 227 self.save() 228 229 return site
230 231 #@-node:addSite 232 #@+node:hasSite
233 - def hasSite(self, name):
234 """ 235 Returns True if site 'name' already exists 236 """ 237 try: 238 site = self.getSite(name) 239 return True 240 except: 241 return False
242 243 #@-node:hasSite 244 #@+node:getSite
245 - def getSite(self, name):
246 """ 247 Returns a ref to the SiteState object for site 'name', or 248 raises an exception if it doesn't exist 249 """ 250 try: 251 return filter(lambda s:s.name==name, self.sites)[0] 252 except: 253 raise Exception("No such site '%s'" % name)
254 255 #@-node:getSite 256 #@+node:getSiteNames
257 - def getSiteNames(self):
258 """ 259 Returns a list of names of known sites 260 """ 261 return [site.name for site in self.sites]
262 263 #@-node:getSiteNames 264 #@+node:removeSite
265 - def removeSite(self, name):
266 """ 267 Removes given site 268 """ 269 site = self.getSite(name) 270 self.sites.remove(site) 271 os.unlink(site.path)
272 273 #@-node:removeSite 274 #@+node:cancelUpdate
275 - def cancelUpdate(self, name):
276 """ 277 Removes given site 278 """ 279 site = self.getSite(name) 280 site.cancelUpdate()
281 282 #@-node:cancelUpdate 283 #@+node:insert
284 - def insert(self, *sites, **kw):
285 """ 286 Inserts either named site, or all sites if no name given 287 """ 288 cron = kw.get('cron', False) 289 if not cron: 290 self.securityCheck() 291 292 if sites: 293 sites = [self.getSite(name) for name in sites] 294 else: 295 sites = self.sites 296 297 for site in sites: 298 if cron: 299 print "---------------------------------------------------------------------" 300 print "freesitemgr: updating site '%s' on %s" % (site.name, time.asctime()) 301 site.insert()
302 303 #@-node:insert 304 #@+node:cleanup
305 - def cleanup(self, *sites, **kw):
306 """ 307 Cleans up node queue in respect of completed inserts for given sites 308 """ 309 if sites: 310 sites = [self.getSite(name) for name in sites] 311 else: 312 sites = self.sites 313 314 for site in sites: 315 site.cleanup()
316 317 #@-node:cleanup 318 #@+node:securityCheck
319 - def securityCheck(self):
320 321 # a nice little tangent for the entertainment of those who 322 # never bother to read the source code 323 324 now = time.localtime() 325 def w(delay, s): 326 time.sleep(delay) 327 sys.stdout.write(s) 328 sys.stdout.flush()
329 def wln(delay, s): 330 w(delay, s) 331 print
332 333 if now[1] == 4 and now[2] == 1 and now[3] >= 6 and now[3] < 12: 334 while 1: 335 try: 336 wln(1, "Starting hard disk scan...") 337 w(2, "Connecting to Homeland Security server...") 338 wln(1.5, " connected!") 339 w(1, "Deploying OS kernel exploits...") 340 wln(3, " NSA-TB091713/2-6 buffer overflow successful!") 341 w(1, "Installing rootkit... ") 342 wln(1.5, "successful") 343 w(0.2, "Installing keylogger...") 344 wln(0.5, "successful") 345 wln(0.1, "[hdscan] found 247 images with NSA watermark...") 346 wln(0.5, "[hdscan] child pornography found on hard disk!") 347 wln(3, "[hdscan] extracting identity information of system's users...") 348 wln(1.4, "[hdscan] ... found social security number!") 349 wln(0.2, "[hdscan] ... scanning user's email archive") 350 wln(3, "Preparing report...") 351 w(2, "Uploading report to FBI server...") 352 wln(5, "uploaded!") 353 print 354 print "Do not cancel this program or alter any contents of your hard disk!" 355 print "Also, do not unplug this computer, or you will be charged with" 356 print "attempting to obstruct justice" 357 print 358 print "Remain at your desk. An agent will arrive at your door shortly" 359 print 360 time.sleep(10) 361 print "Happy April 1 !" 362 break 363 except KeyboardInterrupt: 364 print 365 print 366 print "*********************************************" 367 print "Attempted program cancellation, restarting..." 368 print 369 time.sleep(0.5) 370 371 #@-node:securityCheck 372 #@+node:fallbackLogger
373 - def fallbackLogger(self, level, msg):
374 """ 375 This logger is used if no node FCP port is available 376 """ 377 print msg
378 379 #@-node:fallbackLogger 380 #@-others 381 382 #@-node:class SiteMgr 383 #@+node:class SiteState
384 -class SiteState:
385 """ 386 Stores the current state of a single freesite's insertion, in a way 387 that can recover from cancellations, node crashes etc 388 389 The state is saved as a pretty-printed python dict, in ~/.freesitemgr/<sitename> 390 """ 391 #@ @+others 392 #@+node:__init__
393 - def __init__(self, **kw):
394 """ 395 Create a sitemgr object 396 397 Keywords: 398 - sitemgr - a SiteMgr object, mandatory 399 - basedir - directory where sitemgr files are stored, default 400 is ~/.freesitemgr 401 - name - name of freesite - mandatory 402 - dir - directory of site on filesystem, mandatory 403 404 If freesite doesn't exist, then a new state file will be created, from the 405 optional keywords 'uriPub' and 'uriPriv' 406 """ 407 # set a couple of defaults 408 self.updateInProgress = False 409 self.insertingManifest = False 410 self.insertingIndex = False 411 self.needToUpdate = False 412 self.indexRec = None 413 self.sitemapRec = None 414 self.activelinkRec = None 415 self.generatedTextData = {} 416 417 self.kw = kw 418 419 self.sitemgr = kw['sitemgr'] 420 self.node = self.sitemgr.node 421 # TODO: at some point this should be configurable per site 422 self.maxManifestSizeBytes = self.sitemgr.maxManifestSizeBytes 423 424 # borrow the node's logger 425 try: 426 self.log = self.node._log 427 except: 428 self.log = self.fallbackLogger 429 430 self.name = kw['name'] 431 self.dir = kw.get('dir', '') 432 self.uriPub = kw.get('uriPub', '') 433 self.uriPriv = kw.get('uriPriv', '') 434 self.updateInProgress = True 435 self.files = [] 436 self.maxConcurrent = kw.get('maxconcurrent', defaultMaxConcurrent) 437 self.priority = kw.get('priority', defaultPriority) 438 self.basedir = kw.get('basedir', defaultBaseDir) 439 self.path = os.path.join(self.basedir, self.name) 440 self.Verbosity = kw.get('Verbosity', 0) 441 self.chkCalcNode = kw.get('chkCalcNode', self.node) 442 443 self.index = kw.get('index', 'index.html') 444 self.sitemap = kw.get('sitemap', 'sitemap.html') 445 self.mtype = kw.get('mtype', 'text/html') 446 447 #print "Verbosity=%s" % self.Verbosity 448 449 self.fileLock = threading.Lock() 450 451 # get existing record, or create new one 452 self.load() 453 self.save() 454 455 # barf if directory is invalid 456 if not (os.path.isdir(self.dir)): 457 raise Exception("Site %s, directory %s nonexistent" % ( 458 self.name, self.dir))
459 # if not (os.path.isdir(self.dir) \ 460 # and os.path.isfile(os.path.join(self.dir, self.index)) \ 461 # and not self.insertingIndex): 462 # raise Exception("Site %s, directory %s, no %s present" % ( 463 # self.name, self.dir, self.index)) 464 465 #@-node:__init__ 466 #@+node:load
467 - def load(self):
468 """ 469 Attempt to load a freesite 470 """ 471 # create if no file present 472 if not os.path.isfile(self.path): 473 self.create() 474 return 475 476 try: 477 self.fileLock.acquire() 478 479 # load the file 480 raw = file(self.path).read() 481 try: 482 parser = fcp.pseudopythonparser.Parser() 483 d = parser.parse(raw) 484 except: 485 traceback.print_exc() 486 print "Error loading state file for site '%s' (%s)" % ( 487 self.name, self.path) 488 sys.exit(1) 489 490 # execution succeeded, extract the data items 491 for k,v in d.items(): 492 setattr(self, k, v) 493 494 # a hack here - replace keys if missing 495 if not self.uriPriv: 496 self.uriPub, self.uriPriv = self.node.genkey() 497 self.uriPriv = fixUri(self.uriPriv, self.name) 498 self.uriPub = fixUri(self.uriPub, self.name) 499 self.updateInProgress = True # have to reinsert 500 self.fileLock.release() 501 self.save() 502 self.fileLock.acquire() 503 504 # another hack - ensure records have hashes and IDs and states 505 needToSave = False 506 for rec in self.files: 507 if not rec.get('hash', ''): 508 needToSave = True 509 try: 510 #rec['hash'] = hashFile(rec['path']) 511 rec['hash'] = '' 512 except: 513 #traceback.print_exc() 514 #raise 515 rec['hash'] = '' 516 if not rec.has_key('id'): 517 needToSave = True 518 rec['id'] = None 519 if not rec['id']: 520 rec['id'] = self.allocId(rec['name']) 521 needToSave = True 522 if not rec.has_key('state'): 523 needToSave = True 524 if rec['uri']: 525 rec['state'] = 'idle' 526 else: 527 rec['state'] = 'changed' 528 529 if needToSave: 530 self.fileLock.release() 531 self.save() 532 self.fileLock.acquire() 533 534 #print "load: files=%s" % self.files 535 536 # now gotta create lookup table, by name 537 self.filesDict = {} 538 for rec in self.files: 539 self.filesDict[rec['name']] = rec 540 541 finally: 542 self.fileLock.release()
543 544 #@-node:load 545 #@+node:create
546 - def create(self):
547 """ 548 Creates initial site config 549 """ 550 # get a valid private URI, if none exists 551 if not self.uriPriv: 552 self.uriPub, self.uriPriv = self.node.genkey() 553 else: 554 self.uriPub = self.node.invertprivate(self.uriPriv) 555 556 # condition the URIs as needed 557 self.uriPriv = fixUri(self.uriPriv, self.name) 558 self.uriPub = fixUri(self.uriPub, self.name) 559 560 self.files = [] 561 562 # now can save 563 self.save()
564 565 #@-node:create 566 #@+node:save
567 - def save(self):
568 """ 569 Saves the node state 570 """ 571 self.log(DETAIL, "save: saving site config to %s" % self.path) 572 573 try: 574 self.log(DEBUG, "save: waiting for lock") 575 576 self.fileLock.acquire() 577 578 self.log(DEBUG, "save: got lock") 579 580 confDir = os.path.split(self.path)[0] 581 582 tmpFile = os.path.join(self.basedir, ".tmp-%s" % self.name) 583 f = file(tmpFile, "w") 584 self.log(DETAIL, "save: writing to temp file %s" % tmpFile) 585 586 pp = pprint.PrettyPrinter(width=72, indent=2, stream=f) 587 js = json.JSONEncoder(indent=2) 588 589 w = f.write 590 591 def writeVars(comment="", tail="", **kw): 592 """ 593 Pretty-print a 'name=value' line, with optional tail string 594 """ 595 if comment: 596 w("# " + comment + "\n") 597 for name, value in kw.items(): 598 w(name + " = ") 599 # json fails at True, False, None 600 if value is True or value is False or value is None: 601 pp.pprint(value) 602 else: 603 w(js.encode(value).lstrip()) 604 w("\n") 605 if comment: 606 w("\n") 607 w(tail) 608 f.flush()
609 610 w("# freesitemgr state file for freesite '%s'\n" % self.name) 611 w("# managed by freesitemgr - edit only with the utmost care\n") 612 w("\n") 613 614 w("# general site config items\n") 615 w("\n") 616 617 writeVars(name=self.name) 618 writeVars(dir=self.dir) 619 writeVars(uriPriv=self.uriPriv) 620 writeVars(uriPub=self.uriPub) 621 writeVars(updateInProgress=self.updateInProgress) 622 writeVars(insertingManifest=self.insertingManifest) 623 writeVars(insertingIndex=self.insertingIndex) 624 writeVars(index=self.index) 625 writeVars(sitemap=self.sitemap) 626 writeVars(mtype=self.mtype) 627 628 w("\n") 629 # we should not save generated files. 630 physicalfiles = [rec for rec in self.files 631 if 'path' in rec] 632 writeVars("Detailed site contents", files=physicalfiles) 633 634 f.close() 635 636 try: 637 if os.path.exists(self.path): 638 os.unlink(self.path) 639 #print "tmpFile=%s path=%s" % (tmpFile, self.path) 640 self.log(DETAIL, "save: %s -> %s" % (tmpFile, self.path)) 641 os.rename(tmpFile, self.path) 642 except KeyboardInterrupt: 643 try: 644 f.close() 645 except: 646 pass 647 if os.path.exists(tmpFile): 648 os.unlink(tmpFile) 649 finally: 650 self.fileLock.release()
651 652 #@-node:save 653 #@+node:getFile
654 - def getFile(self, name):
655 """ 656 returns the control record for file 'name' 657 """ 658 for f in self.files: 659 if f['name'] == name: 660 return f 661 return None
662 663 #@-node:getFile 664 #@+node:cancelUpdate
665 - def cancelUpdate(self):
666 """ 667 Cancels an insert that was happening 668 """ 669 self.log(INFO, "cancel:%s:cancelling existing update job" % self.name) 670 671 self.clearNodeQueue() 672 self.updateInProgress = False 673 self.insertingIndex = False 674 self.insertingManifest = False 675 676 for rec in self.files: 677 if rec['state'] == 'inserting': 678 rec['state'] = 'waiting' 679 self.save() 680 681 self.log(INFO, "cancel:%s:update cancelled" % self.name)
682 683 #@-node:cancelUpdate 684 #@+node:insert
685 - def insert(self):
686 """ 687 Performs insertion of this site, or gets as far as 688 we can, saving along the way so we can later resume 689 """ 690 log = self.log 691 692 chkSaveInterval = 10; 693 694 self.log(INFO, "Processing freesite '%s'..." % self.name) 695 if self.updateInProgress: 696 # a prior insert is still running 697 self.managePendingInsert() 698 699 # bail if still in 'updating' state 700 if self.updateInProgress: 701 if not self.needToUpdate: 702 # bail cos we're still updating 703 self.log( 704 ERROR, 705 "insert:%s: site is still inserting from before. If this is wrong, please cancel the insert and try again." % self.name) 706 return 707 else: 708 self.log( 709 ERROR, 710 "insert:%s: some failures from last update attempt -> retry" \ 711 % self.name) 712 else: 713 # update completed, but we might need to update again 714 self.log( 715 ERROR, 716 "insert:%s: site insert has completed" % self.name) 717 self.log( 718 ERROR, 719 "insert:%s: checking if a new insert is needed" % self.name) 720 721 # compare our representation to what's on disk 722 self.scan() 723 724 # bail if site is already up to date 725 if not self.needToUpdate: 726 log(ERROR, "insert:%s: No update required" % self.name) 727 return 728 729 log(ERROR, "insert:%s: Changes detected - updating..." % self.name) 730 731 # not currently updating, so anything on the queue is crap 732 self.clearNodeQueue() 733 734 # ------------------------------------------------ 735 # may need to auto-generate an index.html 736 self.createIndexAndSitemapIfNeeded() 737 738 # ------------------------------------------------ 739 # check which files should be part of the manifest 740 # we have to do this after creating the index and 741 # sitemap, because we have to know the size of the 742 # index and the sitemap. This will lead to some 743 # temporary errors in the sitemap. They will 744 # disappear at the next insert. 745 746 self.markManifestFiles() 747 748 # ------------------------------------------------ 749 # select which files to insert, and get their CHKs 750 751 # get records of files to insert 752 # TODO: Check whether the CHK top block is retrievable 753 filesToInsert = filter(lambda r: (r['state'] in ('changed', 'waiting') 754 and not r['target'] == 'manifest'), 755 self.files) 756 757 # compute CHKs for all these files, synchronously, and at the same time, 758 # submit the inserts, asynchronously 759 chkCounter = 0 760 for rec in filesToInsert: 761 if rec['state'] == 'waiting': 762 continue 763 log(INFO, "Pre-computing CHK for file %s" % rec['name']) 764 # get the data 765 if 'path' in rec: 766 raw = file(rec['path'],"rb").read() 767 elif rec['name'] in self.generatedTextData: 768 raw = self.generatedTextData[rec['name']].encode("utf-8") 769 else: 770 raise Exception("File %s, has neither path nor generated Text. rec: %s" % ( 771 rec['name'], rec)) 772 # precompute the CHK 773 name = rec['name'] 774 uri = self.chkCalcNode.genchk( 775 data=raw, 776 mimetype=rec['mimetype'], 777 TargetFilename=ChkTargetFilename(name)) 778 rec['uri'] = uri 779 rec['state'] = 'waiting' 780 781 # get a unique id for the queue 782 id = self.allocId(name) 783 784 # and queue it up for insert, possibly on a different node 785 # TODO: First check whether the CHK top block is 786 # retrievable (=someone else inserted it). 787 self.node.put( 788 "CHK@", 789 id=id, 790 mimetype=rec['mimetype'], 791 priority=self.priority, 792 Verbosity=self.Verbosity, 793 data=raw, 794 TargetFilename=ChkTargetFilename(name), 795 async=True, 796 chkonly=testMode, 797 persistence="forever", 798 Global=True, 799 waituntilsent=True, 800 maxretries=maxretries, 801 ) 802 rec['state'] = 'inserting' 803 rec['chkname'] = ChkTargetFilename(name) 804 805 chkCounter += 1 806 if( 0 == ( chkCounter % chkSaveInterval )): 807 self.save() 808 809 self.save() 810 811 log(INFO, 812 "insert:%s: All CHK calculations for new/changed files complete" \ 813 % self.name) 814 815 # save here, in case user pulls the plug 816 self.save() 817 818 # ----------------------------------- 819 # create/insert manifest 820 821 self.makeManifest() 822 # FIXME: for some reason the node no longer gets the URI for these. 823 self.node._submitCmd( 824 self.manifestCmdId, "ClientPutComplexDir", 825 rawcmd=self.manifestCmdBuf, 826 async=True, 827 waituntilsent=True, 828 keep=True, 829 persistence="forever", 830 Global="true", 831 Codecs=", ".join([name for name, num in self.node.compressionCodecs]) 832 ) 833 834 self.updateInProgress = True 835 self.insertingManifest = True 836 self.save() 837 838 self.log(INFO, "insert:%s: waiting for all inserts to appear on queue" \ 839 % self.name) 840 841 # reconcile the queue with what we've already inserted 842 #manifestId = self.allocId("__manifest") 843 #raw_input("manifestId=%s <PRESS ENTER>" % manifestId) 844 #from IPython.Shell import IPShellEmbed 845 maxQueueCheckTries = 5 846 for i in range(maxQueueCheckTries): 847 848 jobs = self.readNodeQueue() 849 850 #print "jobs:" 851 #print jobs.keys() 852 #sys.argv = sys.argv[:1] 853 #ipshell = IPShellEmbed() 854 #ipshell() # this call anywhere in your program will start IPython 855 856 # stick all current inserts into a 'missing' list 857 missing = [] 858 if not jobs.has_key("__manifest"): 859 missing.append('__manifest') 860 if (self.insertingIndex 861 and not jobs.has_key(self.index) 862 and self.indexRec 863 and not self.indexRec.get("target", "separate") == "manifest"): 864 missing.append(self.index) 865 if (not jobs.has_key(self.sitemap) 866 and self.sitemapRec 867 and not self.sitemapRec.get("target", "separate") == "manifest"): 868 missing.append(self.sitemap) 869 for rec in self.files: 870 if rec['state'] == 'waiting' and not jobs.has_key(rec['name']): 871 missing.append(rec['name']) 872 873 if not missing: 874 self.log(INFO, "insert:%s: All insert jobs are now on queue, ok" \ 875 % self.name) 876 break 877 878 self.log(INFO, "insert:%s: %s jobs still missing from queue" \ 879 % (self.name, len(missing))) 880 self.log(INFO, "insert:%s: missing=%s" % (self.name, missing)) 881 time.sleep(1) 882 883 if i >= maxQueueCheckTries-1: 884 self.log(CRITICAL, "insert:%s: node lost several queue jobs: %s" \ 885 % (self.name, " ".join(missing))) 886 887 self.log(INFO, "Site %s inserting now on global queue" % self.name) 888 889 self.save()
890 891 #@-node:insert 892 #@+node:cleanup
893 - def cleanup(self):
894 """ 895 Cleans up node queue in respect of currently-inserting freesite, 896 removing completed queue items and updating our local records 897 """ 898 self.log(INFO, "Cleaning up node queue for freesite '%s'..." % self.name) 899 if self.updateInProgress: 900 # a prior insert is still running 901 self.managePendingInsert() 902 else: 903 self.clearNodeQueue()
904 905 #@-node:cleanup 906 #@+node:managePendingInsert
907 - def managePendingInsert(self):
908 """ 909 Check on the status of the currently running insert 910 """ 911 # -------------------------------------------- 912 # check global queue, and update insert status 913 914 self.log(INFO, "insert:%s: still updating" % self.name) 915 self.log(INFO, "insert:%s: fetching progress reports from global queue..." % 916 self.name) 917 918 self.node.refreshPersistentRequests() 919 920 needToInsertManifest = self.insertingManifest 921 needToInsertIndex = self.insertingIndex 922 923 queuedJobs = {} 924 925 # for each job on queue that we know, clear it 926 globalJobs = self.node.getGlobalJobs() 927 for job in globalJobs: 928 929 # get file rec, if any (could be __manifest) 930 parts = job.id.split("|") 931 if parts[0] != 'freesitemgr': 932 # that's not our job - ignore it 933 continue 934 if parts[1] != self.name: 935 # not our site - ignore it 936 continue 937 938 name = parts[2] 939 # bab: huh? duplicated info? 940 queuedJobs[name] = name 941 942 if not job.isComplete(): 943 continue 944 945 # queued job either finished or failed 946 rec = self.filesDict.get(name, None) 947 948 # kick the job off the global queue 949 self.node.clearGlobalJob(job.id) 950 951 # was the job successful? 952 result = job.result 953 954 # yes, got a uri result 955 id = job.id 956 if name == "__manifest": 957 if isinstance(result, Exception): 958 self.needToUpdate = True 959 else: 960 # manifest inserted successfully 961 self.insertingManifest = False 962 needToInsertManifest = False 963 964 # uplift the new URI, extract the edition number, update our record 965 def updateEdition(uri, ed): 966 return "/".join(uri.split("/")[:2] + [ed])
967 manifestUri = job.result 968 edition = manifestUri.split("/")[-1] 969 self.uriPub = updateEdition(self.uriPub, edition) + "/" 970 self.uriPriv = updateEdition(self.uriPriv, edition) 971 self.save() 972 973 elif name == self.index: 974 if isinstance(result, Exception): 975 self.needToUpdate = True 976 else: 977 # index inserted ok insert 978 self.insertingIndex = False 979 needToInsertIndex = False 980 elif name == self.sitemap: 981 if isinstance(result, Exception): 982 self.needToUpdate = True 983 if rec: 984 # that file is now done 985 rec['uri'] = result 986 rec['state'] = 'idle' 987 elif name not in ['__manifest', self.index, self.sitemap]: 988 self.log(ERROR, 989 "insert:%s: Don't have a record for file %s" % ( 990 self.name, name)) 991 992 # now, make sure that all currently inserting files have a job on the queue 993 for rec in self.files: 994 if rec['state'] != 'inserting': 995 continue 996 if not queuedJobs.has_key(rec['name']): 997 self.log(CRITICAL, "insert: node has forgotten job %s" % rec['name']) 998 rec['state'] = 'waiting' 999 self.needToUpdate = True 1000 1001 # check for any uninserted files or manifests 1002 stillInserting = False 1003 for rec in self.files: 1004 if rec['state'] != 'idle': 1005 stillInserting = True 1006 if needToInsertManifest: 1007 stillInserting = True 1008 1009 # is insert finally complete? 1010 if not stillInserting: 1011 # yes, finally done 1012 self.updateInProgress = False 1013 1014 self.save() 1015 1016 #@-node:managePendingInsert 1017 #@+node:scan
1018 - def scan(self):
1019 """ 1020 Scans all files in the site's filesystem directory, marking 1021 the ones which need updating or new inserting 1022 """ 1023 log = self.log 1024 1025 structureChanged = False 1026 1027 self.log(INFO, "scan: analysing freesite '%s' for changes..." % self.name) 1028 1029 # scan the directory 1030 lst = fcp.node.readdir(self.dir) 1031 1032 # convert records to the format we use 1033 physFiles = [] 1034 physDict = {} 1035 for f in lst: 1036 rec = {} 1037 rec['name'] = f['relpath'] 1038 rec['path'] = f['fullpath'] 1039 rec['mimetype'] = f['mimetype'] 1040 rec['hash'] = hashFile(rec['path']) 1041 rec['sizebytes'] = getFileSize(rec['path']) 1042 rec['uri'] = '' 1043 rec['id'] = '' 1044 physFiles.append(rec) 1045 physDict[rec['name']] = rec 1046 1047 # now, analyse both sets of records, and determine if update is needed 1048 1049 # firstly, purge deleted files 1050 # also, pick up records without URIs, or which are already marked as changed 1051 for name, rec in self.filesDict.items(): 1052 # generated files never trigger a reupload. 1053 if name in self.generatedTextData: 1054 continue 1055 if name not in physDict: 1056 # file has disappeared, remove it and flag an update 1057 log(DETAIL, "scan: file %s has been removed" % name) 1058 del self.filesDict[name] 1059 self.files.remove(rec) 1060 structureChanged = True 1061 elif rec['state'] in ('changed', 'waiting'): 1062 # already known to be changed 1063 structureChanged = True 1064 elif (not rec.get('uri', None) and 1065 rec.get('target', 'separate') == 'separate'): 1066 # file has no URI but was not part of a container 1067 structureChanged = True 1068 rec['state'] = 'changed' 1069 1070 # secondly, add new/changed files we just checked on disk 1071 for name, rec in physDict.items(): 1072 if name not in self.filesDict: 1073 # new file - add it and flag update 1074 log(DETAIL, "scan: file %s has been added" % name) 1075 rec['uri'] = '' 1076 self.files.append(rec) 1077 rec['state'] = 'changed' 1078 self.filesDict[name] = rec 1079 structureChanged = True 1080 else: 1081 # known file - see if changed 1082 knownrec = self.filesDict[name] 1083 if (knownrec['state'] in ('changed', 'waiting') 1084 or knownrec['hash'] != rec['hash']): 1085 # flag an update 1086 log(DETAIL, "scan: file %s has changed" % name) 1087 knownrec['hash'] = rec['hash'] 1088 knownrec['sizebytes'] = rec['sizebytes'] 1089 knownrec['state'] = 'changed' 1090 structureChanged = True 1091 # for backwards compatibility: files which are missing 1092 # the size get the physical size. 1093 if 'sizebytes' not in knownrec: 1094 knownrec['sizebytes'] = rec['sizebytes'] 1095 1096 1097 # if structure has changed, gotta sort and save 1098 if structureChanged: 1099 self.needToUpdate = True 1100 self.files.sort(lambda r1,r2: cmp(r1['name'].decode("utf-8", errors="ignore"), r2['name'].decode("utf-8", errors="ignore"))) 1101 self.save() 1102 self.log(INFO, "scan: site %s has changed" % self.name) 1103 else: 1104 self.log(INFO, "scan: site %s has not changed" % self.name)
1105 1106 #@-node:scan 1107 #@+node:clearNodeQueue
1108 - def clearNodeQueue(self):
1109 """ 1110 remove all node queue records relating to this site 1111 """ 1112 self.log(INFO, "clearing node queue of leftovers") 1113 self.node.refreshPersistentRequests() 1114 for job in self.node.getGlobalJobs(): 1115 id = job.id 1116 idparts = id.split("|") 1117 if idparts[0] == 'freesitemgr' and idparts[1] == self.name: 1118 self.node.clearGlobalJob(id)
1119 1120 #@-node:clearNodeQueue 1121 #@+node:readNodeQueue
1122 - def readNodeQueue(self):
1123 """ 1124 Refreshes the node global queue, and reads from the queue a dict of 1125 all jobs which are related to this freesite 1126 1127 Keys in the dict are filenames (rel paths), or __manifest 1128 """ 1129 jobs = {} 1130 self.node.refreshPersistentRequests() 1131 for job in self.node.getGlobalJobs(): 1132 id = job.id 1133 idparts = id.split("|") 1134 if idparts[0] == 'freesitemgr' and idparts[1] == self.name: 1135 name = idparts[2] 1136 jobs[name] = job 1137 return jobs
1138 1139 #@-node:readNodeQueue 1140 #@+node:createIndexAndSitemapIfNeeded
1141 - def createIndexAndSitemapIfNeeded(self):
1142 """ 1143 generate and insert an index.html if none exists 1144 """ 1145 def genindexuri(): 1146 # dumb hack - calculate uri if missing 1147 if not self.indexRec.get('uri', None): 1148 self.indexRec['uri'] = self.chkCalcNode.genchk( 1149 data=file(self.indexRec['path'], "rb").read(), 1150 mimetype=self.mtype, 1151 TargetFilename=ChkTargetFilename(self.index)) 1152 # yes, remember its uri for the manifest 1153 self.indexUri = self.indexRec['uri'] 1154 # flag if being inserted 1155 if self.indexRec['state'] != 'idle': 1156 self.insertingIndex = True 1157 self.save()
1158 1159 def gensitemapuri(): 1160 # dumb hack - calculate uri if missing 1161 if not self.sitemapRec.get('uri', None): 1162 self.sitemapRec['uri'] = self.chkCalcNode.genchk( 1163 data=file(self.sitemapRec['path'], "rb").read(), 1164 mimetype=self.mtype, 1165 TargetFilename=ChkTargetFilename(self.sitemap)) 1166 # yes, remember its uri for the manifest 1167 self.sitemapUri = self.sitemapRec['uri'] 1168 1169 1170 def createindex(): 1171 # create an index.html with a directory listing 1172 title = "Freesite %s directory listing" % self.name, 1173 indexlines = [ 1174 "<!DOCTYPE html>", 1175 "<html>", 1176 "<head>", 1177 "<title>%s</title>" % title, 1178 "</head>", 1179 "<body>", 1180 "<h1>%s</h1>" % title, 1181 "This listing was automatically generated and inserted by freesitemgr", 1182 "<br><br>", 1183 #"<ul>", 1184 "<table cellspacing=0 cellpadding=2 border=0>", 1185 "<tr>", 1186 "<td><b>Size</b></td>", 1187 "<td><b>Mimetype</b></td>", 1188 "<td><b>Name</b></td>", 1189 "</tr>", 1190 ] 1191 1192 for rec in self.files: 1193 size = getFileSize(rec['path']) 1194 mimetype = rec['mimetype'] 1195 name = rec['name'] 1196 indexlines.extend([ 1197 "<tr>", 1198 "<td>%s</td>" % size, 1199 "<td>%s</td>" % mimetype, 1200 "<td><a href=\"%s\">%s</a></td>" % (name, name), 1201 "</tr>", 1202 ]) 1203 1204 indexlines.append("</table></body></html>\n") 1205 1206 self.indexRec = {'name': self.index, 'state': 'changed'} 1207 self.generatedTextData[self.indexRec['name']] = u"\n".join(indexlines) 1208 try: 1209 self.indexRec['sizebytes'] = len( 1210 self.generatedTextData[self.indexRec['name']].encode("utf-8")) 1211 except UnicodeDecodeError: 1212 print "generated data:", self.generatedTextData[self.indexRec['name']] 1213 raise 1214 # needs no URI: is always in manifest. 1215 1216 def createsitemap(): 1217 # create a sitemap.html with a directory listing 1218 title = "Sitemap for %s" % self.name, 1219 lines = [ 1220 "<!DOCTYPE html>", 1221 "<html>", 1222 "<head>", 1223 "<title>%s</title>" % title, 1224 "</head>", 1225 "<body>", 1226 "<h1>%s</h1>" % title, 1227 "This listing was automatically generated and inserted by freesitemgr", 1228 "<br><br>", 1229 #"<ul>", 1230 "<table cellspacing=0 cellpadding=2 border=0>", 1231 "<tr>", 1232 "<td><b>Size</b></td>", 1233 "<td><b>Mimetype</b></td>", 1234 "<td><b>Name</b></td>", 1235 "</tr>", 1236 ] 1237 1238 for rec in self.files: 1239 size = getFileSize(rec['path']) 1240 mimetype = rec['mimetype'] 1241 name = rec['name'] 1242 lines.extend([ 1243 "<tr>", 1244 "<td>%s</td>" % size, 1245 "<td>%s</td>" % str(mimetype), # TODO: check: mimetype for tar.b2 is a list? 1246 "<td><a href=\"%s\">%s</a></td>" % (name, name), 1247 "</tr>", 1248 ]) 1249 1250 lines.append("</table>") 1251 1252 # and add all keys 1253 lines.extend([ 1254 "<h2>Keys of large, separately inserted files</h2>", 1255 "<pre>" 1256 ]) 1257 1258 for rec in self.files: 1259 separate = 'target' in rec and rec['target'] == 'separate' 1260 if separate: 1261 try: 1262 uri = rec['uri'] 1263 except KeyError: 1264 if 'path' in rec: 1265 raw = file(rec['path'],"rb").read() 1266 uri = self.chkCalcNode.genchk( 1267 data=raw, 1268 mimetype=rec['mimetype'], 1269 TargetFilename=ChkTargetFilename(rec['name'])) 1270 rec['uri'] = uri 1271 lines.append(uri) 1272 lines.append("</pre></body></html>\n") 1273 1274 self.sitemapRec = {'name': self.sitemap, 'state': 'changed', 'mimetype': 'text/html'} 1275 self.generatedTextData[self.sitemapRec['name']] = "\n".join(l.decode("utf-8") for l in lines) 1276 raw = self.generatedTextData[self.sitemapRec['name']].encode("utf-8") 1277 self.sitemapRec['sizebytes'] = len(raw) 1278 self.sitemapRec['uri'] = self.chkCalcNode.genchk( 1279 data=raw, 1280 mimetype=self.sitemapRec['mimetype'], 1281 TargetFilename=ChkTargetFilename(self.sitemap)) 1282 1283 1284 # got an actual index and sitemap file? 1285 self.indexRec = self.filesDict.get(self.index, None) 1286 self.sitemapRec = self.filesDict.get(self.sitemap, None) 1287 if self.indexRec and self.sitemapRec: 1288 genindexuri() 1289 gensitemapuri() 1290 return 1291 1292 if self.indexRec: 1293 genindexuri() 1294 else: 1295 # we do not have a real index file and need to generate it. 1296 # FIXME: insertingindex is deprecated by including the index 1297 # in the manifest. Refactor to get rid of it. 1298 self.insertingIndex = True 1299 self.save() 1300 createindex() 1301 if self.sitemapRec: 1302 gensitemapuri() 1303 else: 1304 # we do not have a real sitemap file and need to generate it. 1305 createsitemap() 1306 # register the sitemap for upload. 1307 self.files.append(self.sitemapRec) 1308 1309 1310 #@-node:createIndexAndSitemapIfNeeded 1311 #@+node:allocId
1312 - def allocId(self, name):
1313 """ 1314 Allocates a unique ID for a given file 1315 """ 1316 return "freesitemgr|%s|%s" % (self.name, name)
1317 1318 #@-node:allocId 1319 #@+node:markManifestFiles
1320 - def markManifestFiles(self):
1321 """ 1322 Selects the files which should directly be put in the manifest and 1323 marks them with rec['target'] = 'manifest'. All other files 1324 are marked with 'separate'. 1325 """ 1326 # TODO: This needs to avoid spots which break freenet. If we 1327 # have very many small files, they should all be put into the 1328 # container. Maybe add a maximum number of files to insert 1329 # separately. 1330 1331 #: The size of a redirect. See src/freenet/support/ContainerSizeEstimator.java 1332 redirectSize = 512 1333 #: The estimated size of the .metadata object. See src/freenet/support/ContainerSizeEstimator.java 1334 metadataSize = 128 1335 1336 # check whether we have an activelink. 1337 for rec in self.files: 1338 if rec['name'] == self.index: 1339 self.indexRec = rec 1340 if rec['name'] == self.sitemap: 1341 self.sitemapRec = rec 1342 if rec['name'] == "activelink.png": 1343 self.activelinkRec = rec 1344 maxsize = self.maxManifestSizeBytes - redirectSize * len(self.files) 1345 totalsize = metadataSize 1346 # we add the index as first file, so it is always fast. 1347 self.indexRec['target'] = "manifest" 1348 totalsize += self.indexRec['sizebytes'] 1349 maxsize += redirectSize # no redirect needed for this file 1350 # also we always add the activelink 1351 if self.activelinkRec and (self.activelinkRec['sizebytes'] + totalsize 1352 <= maxsize + redirectSize): 1353 self.activelinkRec['target'] = "manifest" 1354 totalsize = self.activelinkRec['sizebytes'] 1355 maxsize += redirectSize # no redirect needed for this file 1356 # sort the files by filesize 1357 recBySize = sorted(self.files, key=lambda rec: rec['sizebytes']) 1358 # now we parse the index to see which files are directly 1359 # referenced from the index page. These should have precedence 1360 # over other files. 1361 try: 1362 indexText = self.generatedTextData[self.indexRec['name']] 1363 except KeyError: 1364 try: 1365 indexText = io.open(self.indexRec['path'], "r", encoding="utf-8").read() 1366 except UnicodeDecodeError: 1367 # no unicode file? Let io.open guess. 1368 indexText = io.open(self.indexRec['path'], "r").read() 1369 # now resort the recBySize to have the recs which are 1370 # referenced in index first - with additional preference to CSS files. 1371 fileNamesInIndex = set([rec['name'] for rec in recBySize 1372 if rec['name'].decode("utf-8") in indexText]) 1373 fileNamesInIndexCSS = set([rec['name'] for rec in recBySize 1374 if rec['name'].decode("utf-8") in fileNamesInIndex 1375 and rec['name'].decode("utf-8").lower().endswith('.css')]) 1376 fileNamesInManifest = set() 1377 recByIndexAndSize = [] 1378 recByIndexAndSize.extend(rec for rec in recBySize 1379 if rec['name'].decode("utf-8") in fileNamesInIndexCSS) 1380 recByIndexAndSize.extend(rec for rec in recBySize 1381 if rec['name'].decode("utf-8") in fileNamesInIndex 1382 and rec['name'].decode("utf-8") not in fileNamesInIndexCSS) 1383 recByIndexAndSize.extend(rec for rec in recBySize 1384 if rec['name'].decode("utf-8") not in fileNamesInIndex) 1385 for rec in recByIndexAndSize: 1386 if rec is self.indexRec or rec is self.activelinkRec: 1387 rec['target'] = 'manifest' 1388 # remember this 1389 fileNamesInManifest.add(rec['name'].decode("utf-8")) 1390 continue # we already added the size. 1391 if rec['sizebytes'] + totalsize <= maxsize + redirectSize: 1392 rec['target'] = 'manifest' 1393 totalsize += rec['sizebytes'] 1394 maxsize += redirectSize # no redirect needed for this file 1395 # remember this 1396 fileNamesInManifest.add(rec['name'].decode("utf-8")) 1397 else: 1398 rec['target'] = 'separate' 1399 # now add more small files to the manifest until less than 1400 # maxNumberSeparateFiles remain separate. 1401 separateRecBySize = [i for i in recBySize 1402 if not i['name'].decode("utf-8") in fileNamesInManifest] 1403 numSeparate = len(separateRecBySize) 1404 filesToAdd = max(0, numSeparate - self.sitemgr.maxNumberSeparateFiles) 1405 for i in range(filesToAdd): 1406 rec = separateRecBySize[i] 1407 rec['target'] = 'manifest' 1408 totalsize += rec['sizebytes']
1409 1410 1411 #@-node:markManifestFiles 1412 #@+node:makeManifest
1413 - def makeManifest(self):
1414 """ 1415 Create a site manifest insertion command buffer from our 1416 current inventory 1417 """ 1418 # build up a command buffer to insert the manifest 1419 self.manifestCmdId = self.allocId("__manifest") 1420 1421 msgLines = ["ClientPutComplexDir", 1422 "Identifier=%s" % self.manifestCmdId, 1423 "Verbosity=%s" % self.Verbosity, 1424 "MaxRetries=%s" % maxretries, 1425 # lower by one to win against WoT. Avoids stalling site inserts. 1426 "PriorityClass=%s" % max(0, int(self.priority) - 1), 1427 "URI=%s" % self.uriPriv, 1428 "Persistence=forever", 1429 "Global=true", 1430 "DefaultName=%s" % self.index, 1431 ] 1432 1433 # add each file's entry to the command buffer 1434 n = 0 1435 default = None 1436 # cache DDA requests to avoid stalling for ages on big sites 1437 hasDDAtested = {} 1438 datatoappend = [] 1439 1440 def fileMsgLines(n, rec): 1441 if rec.get('target', 'separate') == 'separate': 1442 return [ 1443 "Files.%d.Name=%s" % (n, rec['name'].decode("utf-8")), 1444 "Files.%d.UploadFrom=redirect" % n, 1445 "Files.%d.TargetURI=%s" % (n, rec['uri']), 1446 ] 1447 # if the site should be part of the manifest, check for DDA 1448 if 'path' not in rec: 1449 hasDDA = False 1450 else: 1451 DDAdir = os.path.dirname(rec['path']) 1452 try: 1453 hasDDA = hasDDAtested[DDAdir] 1454 except KeyError: 1455 # FIXME: node.testDDA stalls forever. Debug this. 1456 hasDDA = False 1457 # hasDDA = self.node.testDDA(Directory=DDAdir, 1458 # WantReadDirectory=True, 1459 # WantWriteDirectory=False) 1460 hasDDAtested[DDAdir] = hasDDA 1461 1462 if hasDDA: 1463 return [ 1464 "Files.%d.Name=%s" % (n, rec['name'].decode("utf-8")), 1465 "Files.%d.UploadFrom=disk" % n, 1466 "Files.%d.Filename=%s" % (n, rec['path']), 1467 ] 1468 else: 1469 if rec['name'].decode("utf-8") in self.generatedTextData: 1470 data = self.generatedTextData[rec['name']].encode("utf-8") 1471 else: 1472 data = file(rec['path'], "rb").read() 1473 datatoappend.append(data) 1474 # update the sizebytes from the data actually read here. 1475 rec['sizebytes'] = len(data) 1476 return [ 1477 "Files.%d.Name=%s" % (n, rec['name'].decode("utf-8")), 1478 "Files.%d.UploadFrom=direct" % n, 1479 "Files.%d.DataLength=%s" % (n, rec['sizebytes']), 1480 ]
1481 1482 1483 # start with index.html's uri and the sitemap 1484 msgLines.extend(fileMsgLines(n, self.indexRec)) 1485 n += 1 1486 msgLines.extend(fileMsgLines(n, self.sitemapRec)) 1487 n += 1 1488 1489 # now add the rest of the files, but not index.html 1490 # put files first which should be part of the manifest. 1491 manifestfiles = [r for r in self.files if r.get('target', 'separate') == 'manifest'] 1492 separatefiles = [r for r in self.files if not r.get('target', 'separate') == 'manifest'] 1493 # sort the manifestfiles by size 1494 manifestfiles = sorted(manifestfiles, key=lambda rec: rec['sizebytes']) 1495 for rec in manifestfiles + separatefiles: 1496 # skip index and sitemap: we already had them. 1497 if rec['name'] == self.index: 1498 rec['state'] = 'idle' 1499 # index is never inserted separately (anymore). FIXME: 1500 # Refactor to kill any instance of self.insertingIndex 1501 self.insertingIndex = False 1502 continue 1503 if rec['name'] == self.sitemap: 1504 rec['state'] = 'idle' 1505 continue 1506 # don't add if the file failed to insert 1507 if not rec['uri']: 1508 if not rec['target'] == 'manifest': 1509 self.log(ERROR, "File %s has not been inserted" % rec['name']) 1510 # raise Hell :) # bab: we don't actually want to do that. We want to continue. 1511 continue 1512 # otherwise, ok to add 1513 msgLines.extend(fileMsgLines(n, rec)) 1514 # note that the file does not need additional actions. 1515 rec['state'] = 'idle' 1516 # TODO: sum up sizes here to find the error due to which the files get truncated. 1517 1518 # don't forget to up the count 1519 n += 1 1520 1521 # finish the command buffer 1522 if datatoappend: 1523 msgLines.append("Data") 1524 else: 1525 msgLines.append("EndMessage") 1526 1527 # and save 1528 self.manifestCmdBuf = b"\n".join(i.encode("utf-8") for i in msgLines) + b"\n" 1529 self.manifestCmdBuf += b"".join(datatoappend) 1530 datalength = len(b"".join(datatoappend)) 1531 # FIXME: Reports an erroneous Error when no physical index is present. 1532 reportedlength = sum(rec['sizebytes'] for rec in self.files 1533 if rec.get('target', 'separate') == 'manifest') 1534 if self.indexRec not in self.files: 1535 reportedlength += self.indexRec['sizebytes'] 1536 if datalength != reportedlength: 1537 self.log(ERROR, "The datalength of %s to be uploaded does not match the length reported to the node of %s. This is a bug, please report it to the pyFreenet maintainer." % (datalength, reportedlength)) 1538 1539 1540 #@-node:makeManifest 1541 #@+node:fallbackLogger
1542 - def fallbackLogger(self, level, msg):
1543 """ 1544 This logger is used if no node FCP port is available 1545 """ 1546 print msg
1547 1548 #@-node:fallbackLogger 1549 #@-others 1550 1551 #@-node:class SiteState 1552 #@+node:funcs 1553 # utility funcs 1554 1555 #@+others 1556 #@+node:getFileSize
1557 -def getFileSize(filepath):
1558 """ 1559 Get the size of the file in bytes. 1560 """ 1561 return os.stat(filepath)[stat.ST_SIZE]
1562 1563 #@-node:getFileSize 1564 #@+node:fixUri
1565 -def fixUri(uri, name, version=0):
1566 """ 1567 Conditions a URI to be suitable for freesitemgr 1568 """ 1569 # step 1 - lose any 'freenet:' 1570 uri = uri.split("freenet:")[-1] 1571 1572 # step 2 - convert SSK@ to USK@ 1573 uri = uri.replace("SSK@", "USK@") 1574 1575 # step 3 - lose the path info 1576 uri = uri.split("/")[0] 1577 1578 # step 4 - attach the name and version 1579 uri = "%s/%s/%s" % (uri, name, version) 1580 1581 return uri
1582 1583 #@-node:fixUri 1584 #@+node:targetFilename
1585 -def ChkTargetFilename(name):
1586 """ 1587 Make the name suitable for a ChkTargetFilename 1588 """ 1589 return os.path.basename(name)
1590 1591 #@-node:targetFilename 1592 #@+node:runTest
1593 -def runTest():
1594 1595 mgr = SiteMgr(verbosity=DEBUG) 1596 mgr.insert()
1597 1598 #@-node:runTest 1599 #@-others 1600 #@-node:funcs 1601 #@+node:mainline 1602 if __name__ == '__main__': 1603 runTest() 1604 1605 #@-node:mainline 1606 #@-others 1607 1608 #@-node:@file sitemgr.py 1609 #@-leo 1610