.. _fbf.utils.urlstats: urlstats ~~~~~~~~ .. automodule:: fbf.utils.urlstats :show-inheritance: :members: :undoc-members: CODE ---- :: # fbf/utils/urlstats.py # # """ persist stats of an url. """ .. _fbf.utils.urlstats_fbf_imports: fbf imports -------------- :: from fbf.lib.persist import Persist, PersistCollection from fbf.lib.datadir import getdatadir from fbf.utils.statdict import StatDict from fbf.utils.name import stripname from fbf.utils.url import striphtml, Url .. _fbf.utils.urlstats_basic_imports: basic imports ---------------- :: import time import logging import os .. _fbf.utils.urlstats_UrlStats_class: UrlStats class ----------------- :: class UrlStats(Persist): def __init__(self, url): self.scantime = 0 self.url = Url(url) self.fname = getdatadir() + os.sep + 'spider' + os.sep + 'stats' + os.sep + stripname(url) Persist.__init__(self, self.fname) def get(self): content = geturl2(self.url) if content: return self.input(content) def input(self, html): self.scantime = time.time() words = striphtml(html) words = words.replace("\n", "").split() stats = StatDict() for w in words: stats.upitem(w) self.data.url = self.url.url self.data.words = stats self.save() logging.warn("%s words found for %s" % (len(stats), self.url.url)) return stats def stats(self): stats = StatDict(self.data.words) return stats