markov

Markov Talk for Gozerbot

The Chain:
(predictate) -> [list of possible words]
TODO:
  • Propabilities
  • Start searching for full sentence, not just the first ORDER_K words of a sentence
BHJTW:
  • adapted for JSONBOT, FBFBOT
class fbf.plugs.extra.markov.BeginMarker

Bases: fbf.plugs.extra.markov.Marker

class fbf.plugs.extra.markov.EndMarker

Bases: fbf.plugs.extra.markov.Marker

class fbf.plugs.extra.markov.Marker

Bases: builtins.object

class fbf.plugs.extra.markov.NickMarker

Bases: fbf.plugs.extra.markov.Marker

fbf.plugs.extra.markov.cb_markovjoin(bot, ievent)

callback to run on JOIN

fbf.plugs.extra.markov.cb_markovtalk(bot, ievent)

learn from everything that is being spoken to the bot

fbf.plugs.extra.markov.cb_markovtalk_test(bot, ievent)

callback precondition

fbf.plugs.extra.markov.dummycb(bot, event)
fbf.plugs.extra.markov.enabled(botname, channel)
fbf.plugs.extra.markov.getline(text_line)

get line from markovvhains

fbf.plugs.extra.markov.getreply(bot, ievent, text_line)

get 20 replies and choose the largest one

fbf.plugs.extra.markov.handle_markov(bot, ievent)

this is the command to make the bot reply a markov response

fbf.plugs.extra.markov.handle_markovdisable(bot, ievent)

disable markov in a channel

fbf.plugs.extra.markov.handle_markovenable(bot, ievent)

enable markov in a channel .. learn the log of that channel

fbf.plugs.extra.markov.handle_markovlearn(bot, ievent)

command to let the bot learn a log or an url .. learned data is not persisted

fbf.plugs.extra.markov.handle_markovlearnadd(bot, ievent)

add log or url to be learned at startup or reload

fbf.plugs.extra.markov.handle_markovlearndel(bot, ievent)

remove item from learnlist

fbf.plugs.extra.markov.handle_markovlearnlist(bot, ievent)

show the learnlist

fbf.plugs.extra.markov.handle_markovonjoinadd(bot, ievent)

add channel to onjoin list

fbf.plugs.extra.markov.handle_markovonjoinremove(bot, ievent)

remove channel from onjoin list

fbf.plugs.extra.markov.handle_markovsize(bot, ievent)

markov-size .. returns size of markovchains

fbf.plugs.extra.markov.i2o(iorder)
fbf.plugs.extra.markov.init()

init plugin

fbf.plugs.extra.markov.iscommand(bot, ievent)

check to see if ievent is a command

fbf.plugs.extra.markov.markovlearnlog(chan)

learn a log

fbf.plugs.extra.markov.markovlearnspider(target)
fbf.plugs.extra.markov.markovlearnurl(url)

learn an url

fbf.plugs.extra.markov.markovtalk_learn(text_line)

this is the function were a text line gets learned

fbf.plugs.extra.markov.markovtrain(l)

train items in list

fbf.plugs.extra.markov.msg_to_array(msg)

convert string to lowercased items in list

fbf.plugs.extra.markov.mw(w)
fbf.plugs.extra.markov.o2i(order)
fbf.plugs.extra.markov.pre_markovjoin(bot, ievent)
fbf.plugs.extra.markov.size()

return size of markov chains

fbf.plugs.extra.markov.strip_txt(bot, txt)

strip bot nick and addressing

CODE

# plugs/markov.py
#
#

"""

Markov Talk for Gozerbot

The Chain:
    (predictate) -> [list of possible words]

TODO:
    - Propabilities
    - Start searching for full sentence, not just the first ORDER_K words
      of a sentence

BHJTW:
    - adapted for JSONBOT, FBFBOT

"""

__copyright__ = 'this file is in the public domain'
__author__ =  'Bas van Oostveen'
__coauthor__ = 'Bart Thate <feedbackloop@gmail.com>'

fbf imports

from fbf.lib.datadir import getdatadir
from fbf.utils.url import geturl, striphtml, geturl2
from fbf.utils.generic import jsonstring
from fbf.lib.persist import PlugPersist
from fbf.lib.commands import cmnds
from fbf.lib.examples import examples
from fbf.lib.callbacks import callbacks
from fbf.lib.plugins import plugs as plugins
from fbf.lib.threads import start_new_thread
from fbf.utils.limlist import Limlist
from fbf.lib.persist import PersistCollection, Persist
from fbf.utils.exception import handle_exception
from fbf.utils.name import reversename
from os.path import join as _j

basic imports

import urllib
import time
import re
import random
import types
import logging
import os

from fbf.lib.persistconfig import PersistConfig

config stuff

cfg = PersistConfig()
cfg.define('enable', [])
cfg.define('command', 0)
cfg.define('onjoin', [])
cfg.define('loud', 0)
cfg.define("target", "fbfbot")

enabled function

def enabled(botname, channel):
    if jsonstring([botname, channel]) in cfg['enable']:
        return True

Markers (is Marker the correct name for this?)

class Marker: pass
class BeginMarker(Marker): pass
class EndMarker(Marker): pass
class NickMarker(Marker): pass

Tokens

TOKEN = Marker()
TOKEN_BEGIN = BeginMarker()
TOKEN_END = EndMarker()
TOKEN_NICK = NickMarker()

Order-k, use predictate [-k:] = [word,word,]

# if ORDER_K==1: { ('eggs'):['with','spam',], 'with': ['bacon','green',] }
# if ORDER_K==2: { ('eat','eggs'):['with',TOKEN,), ('eggs','with'): ['bacon',] }
# ...
# Logical setting is often 2 or 3

ORDER_K = 2

Maximum generation cycles

MAXGEN = 500

markovlearn data

markovlearn = PlugPersist('markovlearn')
markovlearn.data.l = markovlearn.data.l or []
markovwords = {}
markovwordi = []
markovchains = {}

dummy callback to load this plugin on START

def dummycb(bot, event): pass

callbacks.add('START', dummycb)

plugin init

def init():
    """ init plugin """
    if not cfg.get('enable'): return 1
    callbacks.add("PRIVMSG", cb_markovtalk, cb_markovtalk_test, threaded=True)
    callbacks.add('JOIN', cb_markovjoin, threaded=True)
    callbacks.add('MESSAGE', cb_markovtalk, cb_markovtalk_test, threaded=True)
    callbacks.add('CONSOLE', cb_markovtalk, cb_markovtalk_test, threaded=True)
    start_new_thread(markovtrain, (markovlearn.data.l,))
    return 1

plugin size

def size():
    """ return size of markov chains """
    return len(markovchains)

markovtrain function

def markovtrain(l):
    """ train items in list """
    time.sleep(1)
    logging.warn("list to scan is: %s" % ",".join(l))
    for i in l:
        if i.startswith('http://'): start_new_thread(markovlearnurl, (i,))
        elif i.startswith('spider://'): start_new_thread(markovlearnspider, (i,))
        elif i.startswith('spiders://'): start_new_thread(markovlearnspider, (i,))
        else: start_new_thread(markovlearnlog, (i,))
    return 1

iscommand function

def iscommand(bot, ievent):
    """ check to see if ievent is a command """
    if not ievent.txt: return 0
    try: cc = bot.channels[ievent.channel]['cc']
    except (TypeError, KeyError): cc = None
    txt = ""
    if cc and ievent.txt[0] == cc: txt = ievent.txt[1:]
    if ievent.txt.startswith(bot.nick + ':') or ievent.txt.startswith(bot.nick + ','): txt = ievent.txt[len(bot.nick)+1:]
    oldtxt = ievent.txt
    ievent.txt = txt
    result = plugins.woulddispatch(bot, ievent)
    ievent.txt = oldtxt
    return result

markov callbacks

def pre_markovjoin(bot, ievent):
    if ievent.forwarded or ievent.relayed: return False
    return True


def cb_markovjoin(bot, ievent):
    """ callback to run on JOIN """
    # check if its we who are joining
    nick = ievent.nick.lower()
    if nick in bot.splitted: return
    if nick == bot.cfg.nick: return
    # check if (bot.name, ievent.channel) is in onjoin list if so respond
    try: onjoin = cfg.get('onjoin')
    except KeyError: onjoin = None
    if type(onjoin) != list: return
    if jsonstring([bot.name, ievent.channel]) in onjoin:
        txt = getreply(bot, ievent, ievent.nick + ':')
        if txt: ievent.reply('%s: %s' % (ievent.nick, txt))

def cb_markovtalk_test(bot, ievent):
    """ callback precondition """
    if ievent.iscmnd(): return False
    return True

def cb_markovtalk(bot, ievent):
    """ learn from everything that is being spoken to the bot """
    txt = strip_txt(bot, ievent.txt)
    # markovtalk_learn
    if enabled(bot.cfg.name, ievent.channel): markovtalk_learn(txt)
    # if command is set in config then we don't respond in callback
    elif not cfg.get('loud'): return
    itxt = ievent.txt.lower()
    # check is bot.nick is in ievent.txt if so give response
    botnick = cfg.target
    #responsenicks = (botnick, botnick+":", botnick+",")
    if botnick in itxt or cfg.get('loud') and ievent.msg:
        # reply when called
        result = getreply(bot, ievent, txt)
        # dont reply if answer is going to be the same as question
        if not result: return
        if result.lower() == txt.lower(): return
        ievent.reply(result)

re to strip first word of logline

txtre = re.compile('^\S+ ')

markovlearnspider function

def markovlearnspider(target):
    logging.warn("starting spider learn on %s" % target)
    coll = PersistCollection(getdatadir() + os.sep + 'spider' + os.sep + "data")
    if target.startswith("spider://"): target = target[9:]
    objs = coll.search('url', target)
    for obj in objs:
        if not obj.data and obj.data.url: continue
        time.sleep(0.001)
        if target not in obj.data.url: continue
        logging.warn("url is %s" % obj.data.url)
        try:
            if obj.data and obj.data.txt:
                for line in obj.data.txt.split("\n"):
                    if line.count(";") > 1: continue
                    markovtalk_learn(striphtml(line))
        except: handle_exception()

markovlearnlog function

def markovlearnlog(chan):
    """ learn a log """
    lines = 0
    logfiles = os.listdir(getdatadir() + os.sep + 'chatlogs')
    for filename in logfiles:
        if chan[1:] not in filename: continue
        logging.warn("opening %s" % reversename(filename))
        for line in open(getdatadir() + os.sep + 'chatlogs' + os.sep + filename, 'r'):
            if lines % 10 == 0: time.sleep(0.001)
            if not line: continue
            lines += 1
            try:
                txt = ' '.join(line.strip().split()[2:]) # log format is: 2011-08-07 00:02:16  <botfather> love, peace and happiness
                markovtalk_learn(txt)
            except IndexError: continue
    logging.warn('learning %s log done. %s lines' % (chan, lines))
    return lines

markovlearnurl function

def markovlearnurl(url):
    """ learn an url """
    lines = 0
    logging.warn('learning %s' % url)
    try: f = geturl2(url)
    except urllib.error.URLError as ex: logging.warn("error learning from url: %s" % url) ; return []
    for line in f.split('\n'):
        line = striphtml(line)
        if lines % 10 == 0: time.sleep(0.01)
        line = line.strip()
        if not line: continue
        markovtalk_learn(line)
        lines += 1
    logging.warn('learning %s done' % url)
    return lines

markovtalk_learn function

def markovtalk_learn(text_line):
    """ this is the function were a text line gets learned """
    text_line = msg_to_array(text_line)
    length = len(text_line)
    order = [TOKEN, ] * ORDER_K
    for i in range(length-1):
        order.insert(0, text_line[i])
        order = order[:ORDER_K]
        next_word = text_line[i+1]
        key = markovchains.setdefault(o2i(order), [])
        if not next_word in key: key.append(mw(next_word))

strip_txt function

def strip_txt(bot, txt):
    """ strip bot nick and addressing """
    # TODO: strip other nicks, preferably replacing them with something like
    # TOKEN_NICK
    txt = txt.replace(cfg.target, "")
    txt = txt.replace("%s," % bot.cfg.nick, "")
    txt = txt.replace("%s:" % bot.cfg.nick, "")
    txt = txt.replace("%s" % bot.cfg.nick, "")
    return txt.strip()

helper functions

def msg_to_array(msg):
    """ convert string to lowercased items in list """
    return [word.strip().lower() for word in msg.strip().split()]

def mw(w):
    if not w in markovwords:
        wi = len(markovwordi)
        markovwordi.append(w)
        markovwords[w] = wi
        return wi
    return markovwords[w]

def o2i(order):
    return tuple(mw(w) for w in order)

def i2o(iorder):
    return tuple(markovwordi[i] for i in iorder)

getreply function

def getreply(bot, ievent, text_line):
    """ get 20 replies and choose the largest one """
    if not text_line: return "blurp .. no input"
    txt = text_line
    text_line = msg_to_array(text_line)
    wordsizes = {}
    maxsize = 0
    for i in text_line:
        wordsizes[len(i)] = i
        if len(i) > maxsize: maxsize = len(i)
    results = []
    keywords = ['is', 'are', "can", "will", "shall"]
    max = maxsize
    p = text_line
    if True:
        for pp in p:
            for k in keywords:
                line = getline('%s %s' % (pp, k))
                if line and line not in results: results.append(line) ; p = line
    if not results: return ""
    #res = []
    #for result in results[:3]:
    #    if len(result.split()) > 1: res.append(result.capitalize())
    #r = '. '.join(res)
    r = random.choice(results)
    if not r.endswith("."): r += "."
    return r.capitalize()

getline function

def getline(text_line):
    """ get line from markovvhains """
    text_line = msg_to_array(text_line)
    order = Limlist(ORDER_K)
    for i in range(ORDER_K): order.append(TOKEN)
    teller = 0
    for i in text_line[:ORDER_K]:
        order[teller] = i
        teller += 1
    output = ""
    prev = ""
    for i in range(MAXGEN):
        try:
            logging.debug(str(order))
            successorList = i2o(markovchains[o2i(order)])
            logging.debug(str(successorList))
        except KeyError as ex: continue
        word = successorList[0]
        if not word: break
        for word in successorList:
            if word not in output: output = output + " "  + word
        order.insert(0, word)
        order = order[:ORDER_K]
    logging.warn(output)
    output = output.replace('"""', '')
    output = output.replace(". ", "")
    output = output.lower()
    return output.strip()

markov-size command

def handle_markovsize(bot, ievent):
    """ markov-size .. returns size of markovchains """
    ievent.reply("I know %s phrases" % str(len(list(markovchains.keys()))))

cmnds.add('markov-size', handle_markovsize, 'OPER')
examples.add('markov-size', 'size of markovchains', 'markov-size')

markov-learn command

def handle_markovlearn(bot, ievent):
    """ command to let the bot learn a log or an url .. learned data
        is not persisted """
    try: item = ievent.args[0]
    except IndexError: ievent.reply('<channel>|<url>') ; return
    if item.startswith('http://'):
        nrlines = markovlearnurl(item)
        ievent.reply('learned %s lines' % nrlines)
        return
    ievent.reply('learning log file %s' % item)
    nrlines = markovlearnlog(item)
    ievent.reply('learned %s lines' % nrlines)

cmnds.add('markov-learn', handle_markovlearn, 'OPER', threaded=True)
examples.add('markov-learn', 'learn a logfile or learn an url', '1) markov-learn #dunkbots 2) markov-learn http://gozerbot.org')

markov-learnadd command

def handle_markovlearnadd(bot, ievent):
    """ add log or url to be learned at startup or reload """
    try: item = ievent.args[0]
    except IndexError: ievent.missing('<channel>|<url>|spider:<url>') ; return
    if item in markovlearn.data.l: ievent.reply('%s is already in learnlist' % item) ; return
    markovlearn.data.l.append(item)
    markovlearn.save()
    start_new_thread(markovtrain, (markovlearn.data.l,))
    ievent.reply('done')

cmnds.add('markov-learnadd', handle_markovlearnadd, 'OPER')
examples.add('markov-learnadd', 'add channel or url to permanent learning .. this will learn the item on startup', '1) markov-learnadd #dunkbots 2) markov-learnadd http:///docs/fbfbot')

markov-learnlist command

def handle_markovlearnlist(bot, ievent):
    """ show the learnlist """
    ievent.reply(str(markovlearn.data.l))

cmnds.add('markov-learnlist', handle_markovlearnlist, 'OPER')
examples.add('markov-learnlist', 'show items in learnlist', 'markov-learnlist')

markov-learndel command

def handle_markovlearndel(bot, ievent):
    """ remove item from learnlist """
    try: item = ievent.args[0]
    except IndexError: ievent.missing('<channel>|<url>') ; return
    if item not in markovlearn.data.l: ievent.reply('%s is not in learnlist' % item) ; return
    markovlearn.data.l.remove(item)
    markovlearn.save()
    ievent.reply('done')

cmnds.add('markov-learndel', handle_markovlearndel, 'OPER')
examples.add('markov-learndel', 'remove item from learnlist', '1) markov-learndel #dunkbots 2) markov-learndel http:///docs/fbfbot')

markov command

def handle_markov(bot, ievent):
    """ this is the command to make the bot reply a markov response """
    if not enabled(bot.cfg.name, ievent.channel): ievent.reply('markov is not enabled in %s' % ievent.channel) ; return
    if not ievent.rest: ievent.missing('<txt>') ; return
    result = getreply(bot, ievent, strip_txt(bot, ievent.rest))
    if result: ievent.reply(result)

cmnds.add('markov', handle_markov, ['USER', 'WEB', 'CLOUD'])
examples.add('markov', 'ask for markov response', 'markov nice weather')

markov-onjoinadd command

def handle_markovonjoinadd(bot, ievent):
    """ add channel to onjoin list """
    try: channel = ievent.args[0]
    except IndexError: channel = ievent.channel
    if (bot.cfg.name, channel) in cfg.get('onjoin'): ievent.reply('%s already in onjoin list' % channel) ; return
    cfg.get('onjoin').append((bot.cfg.name, channel))
    cfg.save()
    ievent.reply('%s added' % channel)

cmnds.add('markov-onjoinadd', handle_markovonjoinadd, 'OPER')
examples.add('markov-onjoinadd', 'add channel to onjoin config', '1) markov-onjoinadd 2) markov-onjoinadd #dunkbots')

markov-onjoinremove command

def handle_markovonjoinremove(bot, ievent):
    """ remove channel from onjoin list """
    try: channel = ievent.args[0]
    except IndexError: channel = ievent.channel
    try: cfg.get('onjoin').remove((bot.cfg.name, channel))
    except ValueError: ievent.reply("%s not in onjoin list" % channel) ; return
    cfg.save()
    ievent.reply('%s removed' % channel)

cmnds.add('markov-onjoinremove', handle_markovonjoinremove, 'OPER')
examples.add('markov-onjoinremove', 'remove channel from onjoin config', '1) markov-onjoinremove 2) markov-onjoinremove #dunkbots')

markov-enable command

def handle_markovenable(bot, ievent):
    """ enable markov in a channel .. learn the log of that channel """
    try: channel = ievent.args[0]
    except IndexError: channel = ievent.channel
    if not enabled(bot.cfg.name, channel): cfg.get('enable').append(jsonstring([bot.cfg.name, channel]))
    else: ievent.reply('%s is already enabled' % channel) ; return
    cfg.save()
    markovlearn.data.l.append(channel)
    markovlearn.save()
    ievent.reply('%s enabled' % channel)

cmnds.add('markov-enable', handle_markovenable, 'OPER')
examples.add('markov-enable', 'enable markov learning in [<channel>]', '1) markov-enable 2) markov-enable #dunkbots')

markov-disable command

def handle_markovdisable(bot, ievent):
    """ disable markov in a channel """
    try: channel = ievent.args[0]
    except IndexError: channel = ievent.channel
    if enabled(bot.cfg.name, channel): cfg.get('enable').remove(jsonstring([bot.cfg.name, channel]))
    else: ievent.reply('%s is not enabled' % channel) ; return
    cfg.save()
    try:
        markovlearn.data.l.remove(channel)
        markovlearn.save()
    except ValueError: pass
    ievent.reply('%s disabled' % channel)

cmnds.add('markov-disable', handle_markovdisable, 'OPER')
examples.add('markov-disable', 'disable markov learning in [<channel>]', '1) markov-disable 2) markov-disable #dunkbots')