.. _fbf.plugs.extra.wikipedia: wikipedia ~~~~~~~~~ .. automodule:: fbf.plugs.extra.wikipedia :show-inheritance: :members: :undoc-members: CODE ---- :: # fbf/plugs/common/wikipedia.py # # """ query wikipedia .. use countrycode to select a country specific wikipedia. """ .. _fbf.plugs.extra.wikipedia_fbf_imports: fbf imports -------------- :: from fbf.utils.url import geturl, striphtml from fbf.utils.generic import splittxt, handle_exception, fromenc from fbf.lib.commands import cmnds from fbf.lib.examples import examples from fbf.utils.rsslist import rsslist .. _fbf.plugs.extra.wikipedia_generic_imports: generic imports ------------------ :: from urllib.parse import quote import re import logging .. _fbf.plugs.extra.wikipedia_defines_: defines ---------- :: wikire = re.compile('start content(.*?)end content', re.M) .. _fbf.plugs.extra.wikipedia_searchwiki_function: searchwiki function ---------------------- :: def searchwiki(txt, lang='en'): """ parse wiki data. """ input = [] for i in txt.split(): if i.startswith('-'): if len(i) != 3: continue else: lang = i[1:] continue input.append(i.strip().capitalize()) what = "_".join(input) url = 'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(what.encode('utf-8'))) url2 = 'http://%s.wikipedia.org/wiki/%s' % (lang, quote(what.encode('utf-8'))) txt = getwikidata(url) if not txt: return ("", url2) if 'from other capitalisation' in txt: what = what.title() url = 'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(what.encode('utf-8'))) url2 = 'http://%s.wikipedia.org/wiki/%s' % (lang, quote(what.encode('utf-8'))) txt = getwikidata(url) if '#REDIRECT' in txt or '#redirect' in txt: redir = ' '.join(txt.split()[1:]) url = 'http://%s.wikipedia.org/wiki/Special:Export/%s' % (lang, quote(redir.encode('utf-8'))) url2 = 'http://%s.wikipedia.org/wiki/%s' % (lang, quote(redir.encode('utf-8'))) txt = getwikidata(url) return (txt, url2) .. _fbf.plugs.extra.wikipedia_getwikidata_function: getwikidata function ----------------------- :: def getwikidata(url): """ fetch wiki data """ try: result = geturl(url) except IOError as ex: logging.error("error fetching %s: %s" % (url, str(ex))) ; return if not result: return res = rsslist(result) txt = "" for i in res: try: logging.debug(str(i)) txt = i['text'] break except: pass txt = re.sub('\[\[(.*?)\]\]', '\g<1>', txt) txt = re.sub('{{(.*?)}}', '\g<1>', txt) txt = re.sub('==(.*?)==', '