Source code for cslbot.hooks.url
# -*- coding: utf-8 -*-
# Copyright (C) 2013-2015 Samuel Damashek, Peter Foley, James Forcier, Srijay Kasturi, Reed Koser, Christopher Reffett, and Fox Wilson
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import logging
import multiprocessing
import re
from datetime import datetime, timedelta
from ..helpers import urlutils
from ..helpers.exception import CommandFailedException
from ..helpers.hook import Hook
from ..helpers.orm import Urls
[docs]def get_urls(msg):
# crazy regex to match urls
url_regex = re.compile(r"""(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?....]))""") # noqa
return [x[0] for x in url_regex.findall(msg)]
@Hook('url', ['pubmsg', 'action'], ['config', 'db', 'nick', 'handler'])
[docs]def handle(send, msg, args):
""" Get titles for urls.
| Generate a short url.
| Get the page title.
"""
worker = args['handler'].workers
result = worker.run_pool(get_urls, [msg])
try:
urls = result.get(5)
except multiprocessing.TimeoutError:
worker.restart_pool()
send('Url regex timed out.')
return
for url in urls:
# Prevent botloops
if args['db'].query(Urls).filter(Urls.url == url, Urls.time > datetime.now() - timedelta(seconds=10)).count() > 1:
return
title = None
ex = None
for _ in range(3):
try:
title = urlutils.get_title(url)
except CommandFailedException as e:
# FIXME: there has to be a better way to do this
ex = e
logging.error(ex)
if title is None:
raise ex
key = args['config']['api']['googleapikey']
short = urlutils.get_short(url, key)
last = args['db'].query(Urls).filter(Urls.url == url).order_by(Urls.time.desc()).first()
if args['config']['feature'].getboolean('linkread'):
if last is not None:
lasttime = last.time.strftime('%H:%M:%S on %Y-%m-%d')
send("Url %s previously posted at %s by %s -- %s" % (short, lasttime, last.nick, title))
else:
send('** %s - %s' % (title, short))
args['db'].add(Urls(url=url, title=title, nick=args['nick'], time=datetime.now()))