Source code for mini.looper

# mini/looper.py
#
#

""" poller module. """

## IMPORTS

from mini import Object, kernel
from mini.utils import parse_urls

## basic imports

import threading
import logging

## Spider class

[docs]class Spider(Object): def __init__(zelf, sleeptime, *args, **kwargs): Object.__init__(zelf, *args, **kwargs) zelf.sleeptime = sleeptime zelf.errors = [] zelf.urls = [] zelf.url = Object() zelf.followed = [] zelf.speed = 0 zelf.depth = 5
[docs] def crawl(zelf, *args, **kwargs): url = args[0] urls = [] if not zelf.url: zelf.url.url = url zelf.url.basepath, zelf.url.base, zelf.url.root, zelf.url.file = parse_url(zelf.url.url) pnr = len(url.split("/")) if pnr > zelf.depth: logging.warn("%s depth > 5" % url) ; return if url not in zelf.urls: zelf.urls.append(url) content = do_url("GET", url) newurl = need_redirect(content) if newurl: content = do_url("GET", newurl) ; logging.warn("redirecting to %s" % newurl) newurl2 = need_redirect(content) if newurl2: content = do_url("GET", url) ; logging.warn("redirecting to %s" % newurl2) time.sleep(zelf.speed) zelf.speed += 0.1 urls = parse_urls(url, content.read()) o = Object() o.spider = True o.orig_url = url o.urls = urls o.save() for u in urls: if u in zelf.urls: continue if not zelf.url.base in u: continue if u in zelf.errors: continue zelf.put(zelf.crawl, u) return urls ## RSS class
[docs]class RSS(Object): def __init__(zelf, sleeptime, *args, **kwargs): Object.__init__(zelf, *args, **kwargs) zelf.sleeptime = sleeptime zelf.timer = threading.Timer(sleeptime, zelf.poll) zelf.timer.start()
[docs] def poll(zelf, *args, **kwargs): import mini.feedparser o = Object() for obj in o.get_all("rss"): logging.warn("poll %s" % obj.rss) data = mini.feedparser.parse(obj.rss) for entry in data["entries"]: result = "%s -=- %s" % (entry["title"], entry["link"]) for bot in kernel.fleet: bot.announce(result)