query wikipedia .. use countrycode to select a country specific wikipedia.
fetch wiki data
arguments: <searchtxt> [“-“<countrycode>] - search wikipedia, you can provide an optional country code.
parse wiki data.
# fbf/plugs/common/wikipedia.py # # """ query wikipedia .. use countrycode to select a country specific wikipedia. """
from fbf.utils.url import geturl, striphtml from fbf.utils.generic import splittxt, handle_exception, fromenc from fbf.lib.commands import cmnds from fbf.lib.examples import examples from fbf.utils.rsslist import rsslist
from urllib.parse import quote import re import logging
wikire = re.compile('start content(.*?)end content', re.M)
def searchwiki(txt, lang='en'): """ parse wiki data. """ input = [] for i in txt.split(): if i.startswith('-'): if len(i) != 3: continue else: lang = i[1:] continue input.append(i.strip().capitalize()) what = "_".join(input) url = 'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(what.encode('utf-8'))) url2 = 'http://%s.wikipedia.org/wiki/%s' % (lang, quote(what.encode('utf-8'))) txt = getwikidata(url) if not txt: return ("", url2) if 'from other capitalisation' in txt: what = what.title() url = 'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(what.encode('utf-8'))) url2 = 'http://%s.wikipedia.org/wiki/%s' % (lang, quote(what.encode('utf-8'))) txt = getwikidata(url) if '#REDIRECT' in txt or '#redirect' in txt: redir = ' '.join(txt.split()[1:]) url = 'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(redir.encode('utf-8'))) url2 = 'http://%s.wikipedia.org/wiki/%s' % (lang, quote(redir.encode('utf-8'))) txt = getwikidata(url) return (txt, url2)
def getwikidata(url): """ fetch wiki data """ try: result = geturl(url) except IOError as ex: logging.error("error fetching %s: %s" % (url, str(ex))) ; return if not result: return res = rsslist(result) txt = "" for i in res: try: logging.debug(str(i)) txt = i['text'] break except: pass txt = re.sub('\[\[(.*?)\]\]', '<b>\g<1></b>', txt) txt = re.sub('{{(.*?)}}', '<i>\g<1></i>', txt) txt = re.sub('==(.*?)==', '<h3>\g<1></h3>', txt) txt = re.sub('=(.*?)=', '<h2>\g<1></h2>', txt) txt = re.sub('\*(.*?)\n', '<li>\g<1></li>', txt) txt = re.sub('\n\n', '<br><br>', txt) txt = re.sub('\s+', ' ', txt) txt = txt.replace('|', ' - ') return txt
resultre1 = re.compile("(<li>.*?</li>)") resultre2 = re.compile("(<h2>.*?</h2>)") def handle_wikipedia(bot, ievent): """ arguments: <searchtxt> ["-"<countrycode>] - search wikipedia, you can provide an optional country code. """ if not ievent.rest: ievent.missing('<searchtxt>') ; return showall = False res = searchwiki(ievent.rest) if not res[0]: ievent.reply('no result found') ; return prefix = '%s ===> ' % res[1] result = resultre1.findall(res[0]) if result: if bot.type == "sxmpp" and not ievent.groupchat: showall = True ievent.reply(prefix, result, dot="<br>", showall=showall) return result2 = resultre2.findall(res[0]) if result2: if bot.type == "sxmpp" and not ievent.groupchat: showall = True ievent.reply(prefix, result2, dot="<br>", showall=showall) return else: ievent.reply("no data found on %s" % event.rest) cmnds.add('wikipedia', handle_wikipedia, ['USER', 'GUEST']) examples.add('wikipedia', 'search wikipedia for <what>','1) wikipedia bot 2) wikipedia -nl bot')