Source code for bot.looper

# bot/looper.py
#
#

""" poller module. """

## IMPORTS

from bot import Object, kernel
from bot.utils import parse_urls

## basic imports

import threading
import logging

## Spider class

[docs]class Spider(Object): def __init__(zelf, sleeptime, *args, **kwargs): Object.__init__(zelf, *args, **kwargs) zelf.sleeptime = sleeptime zelf.errors = [] zelf.urls = [] zelf.url = Object() zelf.followed = [] zelf.speed = 0 zelf.depth = 5
[docs] def crawl(zelf, *args, **kwargs): url = args[0] urls = [] if not zelf.url: zelf.url.url = url zelf.url.basepath, zelf.url.base, zelf.url.root, zelf.url.file = parse_url(zelf.url.url) pnr = len(url.split("/")) if pnr > zelf.depth: logging.warn("%s depth > 5" % url) ; return if url not in zelf.urls: zelf.urls.append(url) content = do_url("GET", url) newurl = need_redirect(content) if newurl: content = do_url("GET", newurl) ; logging.warn("redirecting to %s" % newurl) newurl2 = need_redirect(content) if newurl2: content = do_url("GET", url) ; logging.warn("redirecting to %s" % newurl2) time.sleep(zelf.speed) zelf.speed += 0.1 urls = parse_urls(url, content.read()) o = Object() o.spider = True o.orig_url = url o.urls = urls o.save() for u in urls: if u in zelf.urls: continue if not zelf.url.base in u: continue if u in zelf.errors: continue zelf.put(zelf.crawl, u) return urls ## RSS class
[docs]class RSS(Object): def __init__(zelf, sleeptime, *args, **kwargs): Object.__init__(zelf, *args, **kwargs) zelf.sleeptime = sleeptime zelf.do_one()
[docs] def do_one(zelf, *args, **kwargs): zelf.timer = threading.Timer(zelf.sleeptime, zelf.poll) zelf.timer.start()
[docs] def poll(zelf, *args, **kwargs): import bot.feedparser as fp o = Object() logging.warn("polling %s" % str(zelf.timer)) for obj in o.get_all("rss"): logging.warn("poll %s" % obj.rss) data = fp.parse(obj.rss) for entry in data["entries"]: if o.has_obj("link", entry["link"]): logging.warn("skip %s" % entry["link"]) ; continue oo = Object(**entry) oo.save() result = "%s -=- %s" % (entry["title"], entry["link"]) for bot in kernel.fleet: bot.announce(result) zelf.do_one()