class fbf.plugs.extra.markov.BeginMarker

Bases: fbf.plugs.extra.markov.Marker

class fbf.plugs.extra.markov.EndMarker

Bases: fbf.plugs.extra.markov.Marker

class fbf.plugs.extra.markov.Marker

Bases: builtins.object

class fbf.plugs.extra.markov.NickMarker

Bases: fbf.plugs.extra.markov.Marker

fbf.plugs.extra.markov.cb_markovjoin(bot, ievent)

callback to run on JOIN

fbf.plugs.extra.markov.cb_markovtalk(bot, ievent)

learn from everything that is being spoken to the bot

fbf.plugs.extra.markov.cb_markovtalk_test(bot, ievent)

callback precondition

fbf.plugs.extra.markov.dummycb(bot, event)
fbf.plugs.extra.markov.enabled(botname, channel)

get line from markovvhains

fbf.plugs.extra.markov.getreply(bot, ievent, text_line)

get 20 replies and choose the largest one

fbf.plugs.extra.markov.handle_markov(bot, ievent)

this is the command to make the bot reply a markov response

fbf.plugs.extra.markov.handle_markovdisable(bot, ievent)

disable markov in a channel

fbf.plugs.extra.markov.handle_markovenable(bot, ievent)

enable markov in a channel .. learn the log of that channel

fbf.plugs.extra.markov.handle_markovlearn(bot, ievent)

command to let the bot learn a log or an url .. learned data is not persisted

fbf.plugs.extra.markov.handle_markovlearnadd(bot, ievent)

add log or url to be learned at startup or reload

fbf.plugs.extra.markov.handle_markovlearndel(bot, ievent)

remove item from learnlist

fbf.plugs.extra.markov.handle_markovlearnlist(bot, ievent)

show the learnlist

fbf.plugs.extra.markov.handle_markovonjoinadd(bot, ievent)

add channel to onjoin list

fbf.plugs.extra.markov.handle_markovonjoinremove(bot, ievent)

remove channel from onjoin list

fbf.plugs.extra.markov.handle_markovsize(bot, ievent)

markov-size .. returns size of markovchains


init plugin

fbf.plugs.extra.markov.iscommand(bot, ievent)

check to see if ievent is a command


learn a log


learn an url


this is the function were a text line gets learned


train items in list


convert string to lowercased items in list
fbf.plugs.extra.markov.pre_markovjoin(bot, ievent)

return size of markov chains

fbf.plugs.extra.markov.strip_txt(bot, txt)

strip bot nick and addressing


# plugs/


Markov Talk for Gozerbot

The Chain:
    (predictate) -> [list of possible words]

    - Propabilities
    - Start searching for full sentence, not just the first ORDER_K words
      of a sentence

    - adapted for JSONBOT, FBFBOT


__copyright__ = 'this file is in the public domain'
__author__ =  'Bas van Oostveen'
__coauthor__ = 'Bart Thate <>'

fbf imports

from fbf.lib.datadir import getdatadir
from fbf.utils.url import geturl, striphtml, geturl2
from fbf.utils.generic import jsonstring
from fbf.lib.persist import PlugPersist
from fbf.lib.commands import cmnds
from fbf.lib.examples import examples
from fbf.lib.callbacks import callbacks
from fbf.lib.plugins import plugs as plugins
from fbf.lib.threads import start_new_thread
from fbf.utils.limlist import Limlist
from fbf.lib.persist import PersistCollection, Persist
from fbf.utils.exception import handle_exception
from import reversename
from os.path import join as _j

basic imports

import urllib
import time
import re
import random
import types
import logging
import os

from fbf.lib.persistconfig import PersistConfig

config stuff

cfg = PersistConfig()
cfg.define('enable', [])
cfg.define('command', 0)
cfg.define('onjoin', [])
cfg.define('loud', 0)
cfg.define("target", "fbfbot")

enabled function

def enabled(botname, channel):
    if jsonstring([botname, channel]) in cfg['enable']:
        return True

Markers (is Marker the correct name for this?)

class Marker: pass
class BeginMarker(Marker): pass
class EndMarker(Marker): pass
class NickMarker(Marker): pass


TOKEN = Marker()
TOKEN_BEGIN = BeginMarker()
TOKEN_END = EndMarker()
TOKEN_NICK = NickMarker()

Order-k, use predictate [-k:] = [word,word,]

# if ORDER_K==1: { ('eggs'):['with','spam',], 'with': ['bacon','green',] }
# if ORDER_K==2: { ('eat','eggs'):['with',TOKEN,), ('eggs','with'): ['bacon',] }
# ...
# Logical setting is often 2 or 3


Maximum generation cycles

MAXGEN = 500

markovlearn data

markovlearn = PlugPersist('markovlearn') = or []
markovwords = {}
markovwordi = []
markovchains = {}

dummy callback to load this plugin on START

def dummycb(bot, event): pass

callbacks.add('START', dummycb)

plugin init

def init():
    """ init plugin """
    if not cfg.get('enable'): return 1
    callbacks.add("PRIVMSG", cb_markovtalk, cb_markovtalk_test, threaded=True)
    callbacks.add('JOIN', cb_markovjoin, threaded=True)
    callbacks.add('MESSAGE', cb_markovtalk, cb_markovtalk_test, threaded=True)
    callbacks.add('CONSOLE', cb_markovtalk, cb_markovtalk_test, threaded=True)
    start_new_thread(markovtrain, (,))
    return 1

plugin size

def size():
    """ return size of markov chains """
    return len(markovchains)

markovtrain function

def markovtrain(l):
    """ train items in list """
    logging.warn("list to scan is: %s" % ",".join(l))
    for i in l:
        if i.startswith('http://'): start_new_thread(markovlearnurl, (i,))
        elif i.startswith('spider://'): start_new_thread(markovlearnspider, (i,))
        elif i.startswith('spiders://'): start_new_thread(markovlearnspider, (i,))
        else: start_new_thread(markovlearnlog, (i,))
    return 1

iscommand function

def iscommand(bot, ievent):
    """ check to see if ievent is a command """
    if not ievent.txt: return 0
    try: cc = bot.channels[]['cc']
    except (TypeError, KeyError): cc = None
    txt = ""
    if cc and ievent.txt[0] == cc: txt = ievent.txt[1:]
    if ievent.txt.startswith(bot.nick + ':') or ievent.txt.startswith(bot.nick + ','): txt = ievent.txt[len(bot.nick)+1:]
    oldtxt = ievent.txt
    ievent.txt = txt
    result = plugins.woulddispatch(bot, ievent)
    ievent.txt = oldtxt
    return result

markov callbacks

def pre_markovjoin(bot, ievent):
    if ievent.forwarded or ievent.relayed: return False
    return True

def cb_markovjoin(bot, ievent):
    """ callback to run on JOIN """
    # check if its we who are joining
    nick = ievent.nick.lower()
    if nick in bot.splitted: return
    if nick == bot.cfg.nick: return
    # check if (, is in onjoin list if so respond
    try: onjoin = cfg.get('onjoin')
    except KeyError: onjoin = None
    if type(onjoin) != list: return
    if jsonstring([,]) in onjoin:
        txt = getreply(bot, ievent, ievent.nick + ':')
        if txt: ievent.reply('%s: %s' % (ievent.nick, txt))

def cb_markovtalk_test(bot, ievent):
    """ callback precondition """
    if ievent.iscmnd(): return False
    return True

def cb_markovtalk(bot, ievent):
    """ learn from everything that is being spoken to the bot """
    txt = strip_txt(bot, ievent.txt)
    # markovtalk_learn
    if enabled(, markovtalk_learn(txt)
    # if command is set in config then we don't respond in callback
    elif not cfg.get('loud'): return
    itxt = ievent.txt.lower()
    # check is bot.nick is in ievent.txt if so give response
    botnick =
    #responsenicks = (botnick, botnick+":", botnick+",")
    if botnick in itxt or cfg.get('loud') and ievent.msg:
        # reply when called
        result = getreply(bot, ievent, txt)
        # dont reply if answer is going to be the same as question
        if not result: return
        if result.lower() == txt.lower(): return

re to strip first word of logline

txtre = re.compile('^\S+ ')

markovlearnspider function

def markovlearnspider(target):
    logging.warn("starting spider learn on %s" % target)
    coll = PersistCollection(getdatadir() + os.sep + 'spider' + os.sep + "data")
    if target.startswith("spider://"): target = target[9:]
    objs ='url', target)
    for obj in objs:
        if not and continue
        if target not in continue
        logging.warn("url is %s" %
            if and
                for line in"\n"):
                    if line.count(";") > 1: continue
        except: handle_exception()

markovlearnlog function

def markovlearnlog(chan):
    """ learn a log """
    lines = 0
    logfiles = os.listdir(getdatadir() + os.sep + 'chatlogs')
    for filename in logfiles:
        if chan[1:] not in filename: continue
        logging.warn("opening %s" % reversename(filename))
        for line in open(getdatadir() + os.sep + 'chatlogs' + os.sep + filename, 'r'):
            if lines % 10 == 0: time.sleep(0.001)
            if not line: continue
            lines += 1
                txt = ' '.join(line.strip().split()[2:]) # log format is: 2011-08-07 00:02:16  <botfather> love, peace and happiness
            except IndexError: continue
    logging.warn('learning %s log done. %s lines' % (chan, lines))
    return lines

markovlearnurl function

def markovlearnurl(url):
    """ learn an url """
    lines = 0
    logging.warn('learning %s' % url)
    try: f = geturl2(url)
    except urllib.error.URLError as ex: logging.warn("error learning from url: %s" % url) ; return []
    for line in f.split('\n'):
        line = striphtml(line)
        if lines % 10 == 0: time.sleep(0.01)
        line = line.strip()
        if not line: continue
        lines += 1
    logging.warn('learning %s done' % url)
    return lines

markovtalk_learn function

def markovtalk_learn(text_line):
    """ this is the function were a text line gets learned """
    text_line = msg_to_array(text_line)
    length = len(text_line)
    order = [TOKEN, ] * ORDER_K
    for i in range(length-1):
        order.insert(0, text_line[i])
        order = order[:ORDER_K]
        next_word = text_line[i+1]
        key = markovchains.setdefault(o2i(order), [])
        if not next_word in key: key.append(mw(next_word))

strip_txt function

def strip_txt(bot, txt):
    """ strip bot nick and addressing """
    # TODO: strip other nicks, preferably replacing them with something like
    txt = txt.replace(, "")
    txt = txt.replace("%s," % bot.cfg.nick, "")
    txt = txt.replace("%s:" % bot.cfg.nick, "")
    txt = txt.replace("%s" % bot.cfg.nick, "")
    return txt.strip()

helper functions

def msg_to_array(msg):
    """ convert string to lowercased items in list """
    return [word.strip().lower() for word in msg.strip().split()]

def mw(w):
    if not w in markovwords:
        wi = len(markovwordi)
        markovwords[w] = wi
        return wi
    return markovwords[w]

def o2i(order):
    return tuple(mw(w) for w in order)

def i2o(iorder):
    return tuple(markovwordi[i] for i in iorder)

getreply function

def getreply(bot, ievent, text_line):
    """ get 20 replies and choose the largest one """
    if not text_line: return "blurp .. no input"
    txt = text_line
    text_line = msg_to_array(text_line)
    wordsizes = {}
    maxsize = 0
    for i in text_line:
        wordsizes[len(i)] = i
        if len(i) > maxsize: maxsize = len(i)
    results = []
    keywords = ['is', 'are', "can", "will", "shall"]
    max = maxsize
    p = text_line
    if True:
        for pp in p:
            for k in keywords:
                line = getline('%s %s' % (pp, k))
                if line and line not in results: results.append(line) ; p = line
    if not results: return ""
    #res = []
    #for result in results[:3]:
    #    if len(result.split()) > 1: res.append(result.capitalize())
    #r = '. '.join(res)
    r = random.choice(results)
    if not r.endswith("."): r += "."
    return r.capitalize()

getline function

def getline(text_line):
    """ get line from markovvhains """
    text_line = msg_to_array(text_line)
    order = Limlist(ORDER_K)
    for i in range(ORDER_K): order.append(TOKEN)
    teller = 0
    for i in text_line[:ORDER_K]:
        order[teller] = i
        teller += 1
    output = ""
    prev = ""
    for i in range(MAXGEN):
            successorList = i2o(markovchains[o2i(order)])
        except KeyError as ex: continue
        word = successorList[0]
        if not word: break
        for word in successorList:
            if word not in output: output = output + " "  + word
        order.insert(0, word)
        order = order[:ORDER_K]
    output = output.replace('"""', '')
    output = output.replace(". ", "")
    output = output.lower()
    return output.strip()

markov-size command

def handle_markovsize(bot, ievent):
    """ markov-size .. returns size of markovchains """
    ievent.reply("I know %s phrases" % str(len(list(markovchains.keys()))))

cmnds.add('markov-size', handle_markovsize, 'OPER')
examples.add('markov-size', 'size of markovchains', 'markov-size')

markov-learn command

def handle_markovlearn(bot, ievent):
    """ command to let the bot learn a log or an url .. learned data
        is not persisted """
    try: item = ievent.args[0]
    except IndexError: ievent.reply('<channel>|<url>') ; return
    if item.startswith('http://'):
        nrlines = markovlearnurl(item)
        ievent.reply('learned %s lines' % nrlines)
    ievent.reply('learning log file %s' % item)
    nrlines = markovlearnlog(item)
    ievent.reply('learned %s lines' % nrlines)

cmnds.add('markov-learn', handle_markovlearn, 'OPER', threaded=True)
examples.add('markov-learn', 'learn a logfile or learn an url', '1) markov-learn #dunkbots 2) markov-learn')

markov-learnadd command

def handle_markovlearnadd(bot, ievent):
    """ add log or url to be learned at startup or reload """
    try: item = ievent.args[0]
    except IndexError: ievent.missing('<channel>|<url>|spider:<url>') ; return
    if item in ievent.reply('%s is already in learnlist' % item) ; return
    start_new_thread(markovtrain, (,))

cmnds.add('markov-learnadd', handle_markovlearnadd, 'OPER')
examples.add('markov-learnadd', 'add channel or url to permanent learning .. this will learn the item on startup', '1) markov-learnadd #dunkbots 2) markov-learnadd http:///docs/fbfbot')

markov-learnlist command

def handle_markovlearnlist(bot, ievent):
    """ show the learnlist """

cmnds.add('markov-learnlist', handle_markovlearnlist, 'OPER')
examples.add('markov-learnlist', 'show items in learnlist', 'markov-learnlist')

markov-learndel command

def handle_markovlearndel(bot, ievent):
    """ remove item from learnlist """
    try: item = ievent.args[0]
    except IndexError: ievent.missing('<channel>|<url>') ; return
    if item not in ievent.reply('%s is not in learnlist' % item) ; return

cmnds.add('markov-learndel', handle_markovlearndel, 'OPER')
examples.add('markov-learndel', 'remove item from learnlist', '1) markov-learndel #dunkbots 2) markov-learndel http:///docs/fbfbot')

markov command

def handle_markov(bot, ievent):
    """ this is the command to make the bot reply a markov response """
    if not enabled(, ievent.reply('markov is not enabled in %s' % ; return
    if not ievent.missing('<txt>') ; return
    result = getreply(bot, ievent, strip_txt(bot,
    if result: ievent.reply(result)

cmnds.add('markov', handle_markov, ['USER', 'WEB', 'CLOUD'])
examples.add('markov', 'ask for markov response', 'markov nice weather')

markov-onjoinadd command

def handle_markovonjoinadd(bot, ievent):
    """ add channel to onjoin list """
    try: channel = ievent.args[0]
    except IndexError: channel =
    if (, channel) in cfg.get('onjoin'): ievent.reply('%s already in onjoin list' % channel) ; return
    cfg.get('onjoin').append((, channel))
    ievent.reply('%s added' % channel)

cmnds.add('markov-onjoinadd', handle_markovonjoinadd, 'OPER')
examples.add('markov-onjoinadd', 'add channel to onjoin config', '1) markov-onjoinadd 2) markov-onjoinadd #dunkbots')

markov-onjoinremove command

def handle_markovonjoinremove(bot, ievent):
    """ remove channel from onjoin list """
    try: channel = ievent.args[0]
    except IndexError: channel =
    try: cfg.get('onjoin').remove((, channel))
    except ValueError: ievent.reply("%s not in onjoin list" % channel) ; return
    ievent.reply('%s removed' % channel)

cmnds.add('markov-onjoinremove', handle_markovonjoinremove, 'OPER')
examples.add('markov-onjoinremove', 'remove channel from onjoin config', '1) markov-onjoinremove 2) markov-onjoinremove #dunkbots')

markov-enable command

def handle_markovenable(bot, ievent):
    """ enable markov in a channel .. learn the log of that channel """
    try: channel = ievent.args[0]
    except IndexError: channel =
    if not enabled(, channel): cfg.get('enable').append(jsonstring([, channel]))
    else: ievent.reply('%s is already enabled' % channel) ; return
    ievent.reply('%s enabled' % channel)

cmnds.add('markov-enable', handle_markovenable, 'OPER')
examples.add('markov-enable', 'enable markov learning in [<channel>]', '1) markov-enable 2) markov-enable #dunkbots')

markov-disable command

def handle_markovdisable(bot, ievent):
    """ disable markov in a channel """
    try: channel = ievent.args[0]
    except IndexError: channel =
    if enabled(, channel): cfg.get('enable').remove(jsonstring([, channel]))
    else: ievent.reply('%s is not enabled' % channel) ; return
    except ValueError: pass
    ievent.reply('%s disabled' % channel)

cmnds.add('markov-disable', handle_markovdisable, 'OPER')
examples.add('markov-disable', 'disable markov learning in [<channel>]', '1) markov-disable 2) markov-disable #dunkbots')