# bot/looper.py
#
#
""" poller module. """
## IMPORTS
from bot import Object, kernel
from bot.utils import parse_urls
## basic imports
import threading
import logging
## Spider class
[docs]class Spider(Object):
def __init__(zelf, sleeptime, *args, **kwargs):
Object.__init__(zelf, *args, **kwargs)
zelf.sleeptime = sleeptime
zelf.errors = []
zelf.urls = []
zelf.url = Object()
zelf.followed = []
zelf.speed = 0
zelf.depth = 5
[docs] def crawl(zelf, *args, **kwargs):
url = args[0]
urls = []
if not zelf.url:
zelf.url.url = url
zelf.url.basepath, zelf.url.base, zelf.url.root, zelf.url.file = parse_url(zelf.url.url)
pnr = len(url.split("/"))
if pnr > zelf.depth: logging.warn("%s depth > 5" % url) ; return
if url not in zelf.urls:
zelf.urls.append(url)
content = do_url("GET", url)
newurl = need_redirect(content)
if newurl: content = do_url("GET", newurl) ; logging.warn("redirecting to %s" % newurl)
newurl2 = need_redirect(content)
if newurl2: content = do_url("GET", url) ; logging.warn("redirecting to %s" % newurl2)
time.sleep(zelf.speed)
zelf.speed += 0.1
urls = parse_urls(url, content.read())
o = Object()
o.spider = True
o.orig_url = url
o.urls = urls
o.save()
for u in urls:
if u in zelf.urls: continue
if not zelf.url.base in u: continue
if u in zelf.errors: continue
zelf.put(zelf.crawl, u)
return urls
## RSS class
[docs]class RSS(Object):
def __init__(zelf, sleeptime, *args, **kwargs):
Object.__init__(zelf, *args, **kwargs)
zelf.sleeptime = sleeptime
zelf.do_one()
[docs] def do_one(zelf, *args, **kwargs):
zelf.timer = threading.Timer(zelf.sleeptime, zelf.poll)
zelf.timer.start()
[docs] def poll(zelf, *args, **kwargs):
import bot.feedparser as fp
o = Object()
logging.warn("polling %s" % str(zelf.timer))
for obj in o.get_all("rss"):
logging.warn("poll %s" % obj.rss)
data = fp.parse(obj.rss)
for entry in data["entries"]:
if o.has_obj("link", entry["link"]): logging.warn("skip %s" % entry["link"]) ; continue
oo = Object(**entry)
oo.save()
result = "%s -=- %s" % (entry["title"], entry["link"])
for bot in kernel.fleet: bot.announce(result)
zelf.do_one()