.. _fbf.plugs.extra.urlinfo:

urlinfo
~~~~~~~

.. automodule:: fbf.plugs.extra.urlinfo
        :show-inheritance:
        :members:
        :undoc-members:

CODE
----

    ::

        # fbf/plugs/common/urlinfo.py
        # -*- coding: utf-8 -*-
        #
        #
        
        """
            Catches URLs on channel and gives information about them like title, image size, etc.
            Uses http://whatisthisfile.appspot.com/ via XMLRPC
        
            Example:
            19:20 <@raspi> http://www.youtube.com/watch?v=9RZ-hYPAMFQ
            19:20 <@bot> Title: "YouTube - Black Knight Holy Grail"
            19:28 <@raspi> test http://www.raspi.fi foobar http://raspi.fi/wp-includes/images/rss.png
            19:28 <@bot> 1. Title: "raspi.fi" Redirect: http://raspi.fi/  2. Image: 14x14
        
        """
        
        __author__ = "Pekka 'raspi' Järvinen - http://raspi.fi/"
        __license__ = 'BSD'
        
.. _fbf.plugs.extra.urlinfo_fbf_imports:

fbf imports
--------------

    ::

        
        from fbf.utils.exception import handle_exception
        from fbf.lib.callbacks import callbacks
        from fbf.lib.commands import cmnds
        from fbf.lib.persist import PlugPersist
        from fbf.lib.examples import examples
        from fbf.plugs.extra.tinyurl import get_tinyurl
        from fbf.lib.errors import URLNotEnabled
        
.. _fbf.plugs.extra.urlinfo_basic_import:

basic import
---------------

    ::

        
        import re
        import urllib.parse
        import xmlrpc.client
        import socket
        import logging
        
.. _fbf.plugs.extra.urlinfo_defines_:

defines
----------

    ::

        
        cfg = PlugPersist('urlinfo', {})
        
.. _fbf.plugs.extra.urlinfo_sanitize_function:

sanitize function
--------------------

    ::

        
        def sanitize(text):
            """ Remove non-urls word by word. """
            text = text.strip()
            text = re.sub('\s\s+', ' ', text)
            tmp = ''
            for i in text.split(' '):
                if len(i) >= 5:
                    if i.find('www.') != -1 or i.find('http') != -1: tmp += i + ' '
            tmp = tmp.strip()
            tmp2 = ''
            for i in tmp.split(' '):
                if not i: continue
                if (i[0] == '(' and i[-1] == ')') or (i[0] == '[' and i[-1] == ']') or (i[0] == '<' and i[-1] == '>') or (i[0] == '{' and i[-1] == '}'):
                    # First and last character is one of ()[]{}<>
                    tmp2 += i[1:-1:1] + ' '
                else: tmp2 += i + ' '
            tmp2 = tmp2.strip();
            tmp = ''
            for i in tmp2.split(' '):
                if i.find('www.') == 0: tmp += 'http://' + i + ' '
                else: tmp += i + ' '
            tmp = tmp.strip();
            out = tmp;
            return out;
        
.. _fbf.plugs.extra.urlinfo_getUrls_function:

getUrls function
-------------------

    ::

        
        def getUrls(text):
            """ get valid urls. """
            regex = r"http[s]?://[-A-Za-z0-9+&@#/%=~_()|!:,.;?]*[-A-Za-z0-9+&@#/%=~_()|?]"
            p = re.compile(regex)
            urls = []
            for i in text.split(' '):
                for x in p.findall(i):
                    url = urllib.parse.urlparse(x)
                    if url.geturl() not in urls: urls.append(url.geturl())
            return urls
        
.. _fbf.plugs.extra.urlinfo_getUrlInfo_function:

getUrlInfo function
----------------------

    ::

        
        def getUrlInfo(text):
            """ get info of urls in given txt. """
            from fbf.utils.url import enabled
            if not enabled: raise URLNotEnabled
            out = ''
            text = sanitize(text)
            urls = getUrls(text)
            if urls:
                idx = 1
                for i in urls:
                    o = ''
                    try:
                        server = xmlrpc.client.ServerProxy("http://whatisthisfile.appspot.com/xmlrpc")
                        logging.info('urlinfo - XMLRPC query: %s' % i)
                        urlinfo = server.app.query(i)
                        if 'html' in urlinfo:
                             if 'title' in urlinfo['html']: o += 'Title: "%s" ' % urlinfo['html']['title'].strip()
                        elif 'image' in urlinfo: o += 'Image: %dx%d ' % (urlinfo['image']['width'], urlinfo['image']['height'])
                        if not o: continue
                        if len(o):
                            if len(urls) > 1: out += ' ' + str(idx) + '. ' ; idx += 1
                        out += o
                        if "tinyurl" in i: out = out.strip() ; out += " - %s" % i
                        elif not "http://" in out: out = out.strip() ; out += " - %s" % get_tinyurl(i)[0]
                    except Exception: pass
            return out.strip()
        
.. _fbf.plugs.extra.urlinfo_callbacks_:

callbacks
------------

    ::

        
        def catchHasUrls(bot, ievent):
            """ catch channel chat for possible URLs. """
            if ievent.how == "background": return 0
            if ievent.channel in cfg.data and cfg.data[ievent.channel]:
                if len(ievent.txt) >= 5:
                    if (ievent.txt.find('www.') != -1) or (ievent.txt.find('http') != -1): return 1
            return 0  
        
        
        def catchUrls(bot, ievent):
            """ the urlinfo callback. """
            bot.say(ievent.channel, getUrlInfo(ievent.txt))
        
        callbacks.add('PRIVMSG', catchUrls, catchHasUrls, threaded=True)
        callbacks.add('CONSOLE', catchUrls, catchHasUrls, threaded=True)
        callbacks.add('MESSAGE', catchUrls, catchHasUrls, threaded=True)
        callbacks.add('DISPATCH', catchUrls, catchHasUrls, threaded=True)
        callbacks.add('TORNADO', catchUrls, catchHasUrls, threaded=True)
        
.. _fbf.plugs.extra.urlinfo_urlinfo-enable_command:

urlinfo-enable command
-------------------------

    ::

        
        def handle_urlinfo_enable(bot, ievent):
            """ no arguments - enable urlinfo in a channel. """
            cfg.data[ievent.channel] = True
            cfg.save()
            ievent.reply('urlinfo enabled')
        
        cmnds.add('urlinfo-enable', handle_urlinfo_enable, ['OPER'])
        examples.add('urlinfo-enable', 'enable urlinfo in the channel', 'urlinfo-enable')
        
.. _fbf.plugs.extra.urlinfo_urlinfo-disable_command:

urlinfo-disable command
--------------------------

    ::

        
        def handle_urlinfo_disable(bot, ievent):
            """ no arguments - disable urlinfo in a channel. """
            cfg.data[ievent.channel] = False
            cfg.save()
            ievent.reply('urlinfo disabled')
        
        cmnds.add('urlinfo-disable', handle_urlinfo_disable, 'OPER')
        examples.add('urlinfo-disable', 'disable urlinfo in the channel', 'urlinfo-disable')
        
.. _fbf.plugs.extra.urlinfo_urlinfo-list_command:

urlinfo-list command
-----------------------

    ::

        
        def handle_urlinfo_list(bot, ievent):
            """ no arguments - list what channels are using urlinfo. """
            chans = []
            names = list(cfg.data.keys())
            names.sort()
            if not names: ievent.reply('none')
            else: ievent.reply('urlinfo enabled on channels: ', names)
        
        cmnds.add('urlinfo-list', handle_urlinfo_list, 'OPER')
        examples.add('urlinfo-list', 'show in which channels urlinfo is enabled', 'urlinfo-list')