Source code for meds.rss
# meds/rss.py
#
#
""" rss module. """
from meds.utils.url import strip_html, get_feed
from meds.utils.tijd import to_time, file_time
from meds.clock import Repeater
from meds.errors import ENODATE
from meds.object import Object
from meds.utils.join import sj
from meds.cfg import Config, rss
from meds.core import launcher, objs, storage, fleet
import logging
import time
[docs]class RSS(Object):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._cfg = Config(rss)
config = storage.last("cfg", "rss")
if config: self._cfg.update(config)
objs.seen = Object()
objs.seen.list = []
objs.seen.seen = "rss"
objs.seen.prefix = "seen"
obj = storage.last("seen", "rss")
if obj: objs.seen = obj
logging.warn("# seen %s" % len(objs.seen.list))
[docs] def start(self):
repeater = Repeater(600, self.fetcher)
return launcher.launch(repeater.start)
[docs] def fetcher(self):
thrs = []
for obj in storage.find("rss"):
if not obj.rss: continue
thr = launcher.launch(self.fetch, obj)
thrs.append(thr)
result = launcher.waiter(thrs)
if result: logging.info("! fetched %s" % (sj(*[str(x) for x in result if x]) or 0))
objs.seen.sync()
[docs] def fetch(self, obj):
nr = 0
for o in get_feed(obj.rss):
if o.link in objs.seen.list: continue
objs.seen.list.append(o.link)
o.prefix = "feeds"
o.services = "rss"
if "published" in o:
try: date = file_time(to_time(o.published)) ; o.save(date)
except ENODATE as ex: logging.warn("EDATE %s" % str(ex))
else: o.save()
for bot in fleet: bot.announce(self.display(o))
nr += 1
return nr
[docs] def display(self, obj):
result = ""
for key in self._cfg.display_list:
data = obj.get(key, None)
if data: result += "%s - " % strip_html(data.rstrip())
for check in self._cfg.descriptions:
if check in obj.link:
summary = obj.get("summary", None)
if summary: result += "%s - " % summary
if result: return result[:-3].rstrip()