Source code for cslbot.helpers.textutils

# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# Copyright (C) 2013-2015 Samuel Damashek, Peter Foley, James Forcier, Srijay Kasturi, Reed Koser, Christopher Reffett, and Fox Wilson
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.

import json
import re
import string
from html import escape, unescape
from random import choice, randint, random, randrange

from lxml import etree, html

from pkg_resources import Requirement, resource_string

from requests import get, post

from .tokens import token_cache

slogan_cache = []


[docs]def gen_removevowels(msg): return re.sub('[aeiouy]', '', msg, flags=re.I)
[docs]def gen_word(): return get('http://randomword.setgetgo.com/get.php').text.strip()
[docs]def gen_hashtag(msg): msg = "".join([x.strip() for x in msg.split()]) return '#' + msg.translate(dict.fromkeys(map(ord, string.punctuation)))
[docs]def gen_yoda(msg): req = post("http://www.yodaspeak.co.uk/index.php", data={'YodaMe': msg}) return html.fromstring(req.content.decode(errors='ignore')).findtext('.//textarea[@readonly]').strip()
[docs]def gen_gizoogle(msg): req = post("http://www.gizoogle.net/textilizer.php", data={'translatetext': escape(msg).encode('utf-7')}) # This mess is needed because gizoogle has a malformed textarea, so the text isn't within the tag response = unescape(html.tostring(html.fromstring(req.text).find('.//textarea')).decode('utf-7')).strip() response = re.sub(".*</textarea>", '', response) return unescape(response)
[docs]def gen_shakespeare(msg): # Originally from http://www.shmoop.com/shakespeare-translator/ table = json.loads(resource_string(Requirement.parse('CslBot'), 'cslbot/static/shakespeare-dictionary.json').decode()) replist = reversed(sorted(table.keys(), key=len)) pattern = re.compile(r'\b(' + '|'.join(replist) + r')\b', re.I) # Normalize text to hopefully match more words. result = pattern.sub(lambda x: table[x.group().lower()], transform_text(msg)) return result
[docs]def gen_praise(msg): praise = get_praise() while not praise: praise = get_praise() return '%s: %s' % (msg, praise)
[docs]def get_praise(): doc = html.fromstring(get('http://www.madsci.org/cgi-bin/cgiwrap/~lynn/jardin/SCG').text) return doc.find('body/center/h2').text.replace('\n', ' ').strip()
[docs]def gen_fwilson(x, mode=None): if x.lower().startswith('fwil'): mode = 'w' if mode is None: mode = 'w' if random() < 0.5 else 'f' if mode == 'w': output = "wh%s %s" % ('e' * randrange(3, 20), x) return output.upper() else: output = ['fwil%s' % q for q in x.split()] output = ' '.join(output) return output.lower()
[docs]def gen_creffett(msg): return '\x02\x038,04%s!!!' % msg.upper()
[docs]def gen_slogan(msg): # Originally from sloganizer.com if not slogan_cache: slogan_cache.extend(resource_string(Requirement.parse('CslBot'), 'cslbot/static/slogans').decode().splitlines()) return re.sub('%s', msg, choice(slogan_cache))
[docs]def gen_morse(msg): morse_codes = {"a": ".-", "b": "-...", "c": "-.-.", "d": "-..", "e": ".", "f": "..-.", "g": "--.", "h": "....", "i": "..", "j": ".---", "k": "-.-", "l": ".-..", "m": "--", "n": "-.", "o": "---", "p": ".--.", "q": "--.-", "r": ".-.", "s": "...", "t": "-", "u": "..-", "v": "...-", "w": ".--", "x": "-..-", "y": "-.--", "z": "--..", "1": ".----", "2": "..---", "3": "...--", "4": "....-", "5": ".....", "6": "-....", "7": "--...", "8": "---..", "9": "----.", "0": "-----", " ": " ", ".": ".-.-.-", ",": "--..--", "?": "..--..", "'": ".----.", "!": "-.-.--", "/": "-..-.", "(": "-.--.", ")": "-.--.-", "&": ".-...", ":": "---...", ";": "-.-.-.", "=": "-...-", "+": ".-.-.", "-": "-....-", "_": "..--.-", '"': ".-..-.", "$": "...-..-", "@": ".--.-."} morse = "" for i in msg.lower(): if i in morse_codes: morse += morse_codes[i] + " " else: morse += "? " return morse
[docs]def gen_insult(user): adj = [ "acidic", "antique", "contemptible", "culturally-unsound", "despicable", "evil", "fermented", "festering", "foul", "fulminating", "humid", "impure", "inept", "inferior", "industrial", "left-over", "low-quality", "malodorous", "off-color", "penguin-molesting", "petrified", "pointy-nosed", "salty", "sausage-snorfling", "tastless", "tempestuous", "tepid", "tofu-nibbling", "unintelligent", "unoriginal", "uninspiring", "weasel-smelling", "wretched", "spam-sucking", "egg-sucking", "decayed", "halfbaked", "infected", "squishy", "porous", "pickled", "coughed-up", "thick", "vapid", "hacked-up", "unmuzzleld", "bawdy", "vain", "lumpish", "churlish", "fobbing", "rank", "craven", "puking", "jarring", "fly-bitten", "pox-marked", "fen-sucked", "spongy", "droning", "gleeking", "warped", "currish", "milk-livered", "surly", "mammering", "ill-borne", "beef-witted", "tickle-brained", "half-faced", "headless", "wayward", "rump-fed", "onion-eyed", "beslubbering", "villainous", "lewd-minded", "cockered", "full-gorged", "rude-snouted", "crook-pated", "pribbling", "dread-bolted", "fool-born", "puny", "fawning", "sheep-biting", "dankish", "goatish", "weather-bitten", "knotty-pated", "malt-wormy", "saucyspleened", "motley-mind", "it-fowling", "vassal-willed", "loggerheaded", "clapper-clawed", "frothy", "ruttish", "clouted", "common-kissing", "pignutted", "folly-fallen", "plume-plucked", "flap-mouthed", "swag-bellied", "dizzy-eyed", "gorbellied", "weedy", "reeky", "measled", "spur-galled", "mangled", "impertinent", "bootless", "toad-spotted", "hasty-witted", "horn-beat", "yeasty", "boil-brained", "tottering", "hedge-born", "hugger-muggered", "elf-skinned"] amt = [ "accumulation", "bucket", "coagulation", "enema-bucketful", "gob", "half-mouthful", "heap", "mass", "mound", "petrification", "pile", "puddle", "stack", "thimbleful", "tongueful", "ooze", "quart", "bag", "plate", "ass-full", "assload"] noun = [ "bat toenails", "bug spit", "cat hair", "chicken piss", "dog vomit", "dung", "fat-woman's stomach-bile", "fish heads", "guano", "gunk", "pond scum", "rat retch", "red dye number-9", "Sun IPC manuals", "waffle-house grits", "yoo-hoo", "dog balls", "seagull puke", "cat bladders", "pus", "urine samples", "squirrel guts", "snake assholes", "snake bait", "buzzard gizzards", "cat-hair-balls", "rat-farts", "pods", "armadillo snouts", "entrails", "snake snot", "eel ooze", "slurpee-backwash", "toxic waste", "Stimpy-drool", "poopy", "poop", "craptacular carpet droppings", "jizzum", "cold sores", "anal warts"] msg = '%s is a %s %s of %s.' % (user, choice(adj), choice(amt), choice(noun)) return msg
[docs]def char_to_bin(c): i = ord(c) n = 8 # We need to be able to handle wchars if i > 1 << 8: n = 16 if i > 1 << 16: n = 32 ret = "" for _ in range(n): ret += str(i & 1) i >>= 1 return ret[::-1]
[docs]def gen_binary(text): return "".join(map(char_to_bin, text))
[docs]def gen_xkcd_sub(msg, hook=False): # http://xkcd.com/1288/ substitutions = {'witnesses': 'these dudes I know', 'allegedly': 'kinda probably', 'new study': 'tumblr post', 'rebuild': 'avenge', 'space': 'SPAAAAAACCCEEEEE', 'google glass': 'virtual boy', 'smartphone': 'pokedex', 'electric': 'atomic', 'senator': 'elf-lord', 'car': 'cat', 'election': 'eating contest', 'congressional leaders': 'river spirits', 'homeland security': 'homestar runner', 'could not be reached for comment': 'is guilty and everyone knows it'} # http://xkcd.com/1031/ substitutions['keyboard'] = 'leopard' # http://xkcd.com/1418/ substitutions['force'] = 'horse' output = msg if not hook or random() < 0.001 or True: for text, replacement in substitutions.items(): if text in output: output = re.sub(r"\b%s\b" % text, replacement, output) output = re.sub(r'(.*)(?:-ass )(.*)', r'\1 ass-\2', output) if msg == output: return None if hook else msg else: return output
[docs]def reverse(msg): return msg[::-1]
[docs]def gen_lenny(msg): return "%s ( ͡° ͜ʖ ͡°)" % msg
[docs]def gen_shibe(msg): topics = msg.split() if msg else [gen_word()] reaction = 'wow' adverbs = ['so', 'such', 'very', 'much', 'many'] for i in topics: reaction += ' %s %s' % (choice(adverbs), i) quotes = ['omg', 'amaze', 'nice', 'clap', 'cool', 'doge', 'shibe', 'ooh'] for i in range(randint(1, 2)): reaction += ' %s' % choice(quotes) reaction += ' wow' return reaction
[docs]def gen_underscore(msg): return msg.replace(' ', '_').lower()
[docs]def transform_text(msg): # Don't die if no api key if token_cache['translate'] == 'invalid': return msg headers = {'Authorization': 'Bearer %s' % token_cache['translate']} data = get('http://api.microsofttranslator.com/V3/json/TransformText', params={'language': 'en', 'sentence': msg}, headers=headers).json() return data['sentence'] if data['ec'] == 0 else data['em']
[docs]def gen_translate(msg, outputlang='en'): # Don't die if no api key if token_cache['translate'] == 'invalid': return msg headers = {'Authorization': 'Bearer %s' % token_cache['translate']} req = get('http://api.microsofttranslator.com/V2/Http.svc/Translate', params={'text': transform_text(msg), 'to': outputlang}, headers=headers) xml = etree.fromstring(req.content) if xml.tag == 'html': doc = ' '.join(xml.itertext()) doc = ' '.join(doc.splitlines()) return "An error occurred: %s" % doc return xml.text
[docs]def gen_random_translate(msg): # Don't die if no api key if token_cache['translate'] == 'invalid': return msg headers = {'Authorization': 'Bearer %s' % token_cache['translate'], 'Content-Type': 'text/xml'} langs = get('http://api.microsofttranslator.com/V2/Http.svc/GetLanguagesForTranslate', headers=headers) names = post('http://api.microsofttranslator.com/V2/Http.svc/GetLanguageNames', params={'locale': 'en'}, data=langs.text, headers=headers) langs_xml = etree.fromstring(langs.content) names_xml = etree.fromstring(names.content) langs = {langs_xml[x].text: names_xml[x].text for x in range(len(langs_xml))} # No point in english->english del langs['en'] # This Klingon variant seems to royally screw-up terminals. del langs['tlh-Qaak'] outputlang = choice(list(langs.keys())) translation = gen_translate(msg, outputlang) return "%s (%s)" % (translation, langs[outputlang])
[docs]def append_filters(filters): filter_list = [] for next_filter in filter(None, filters.split(',')): if next_filter in output_filters.keys(): filter_list.append(output_filters[next_filter]) else: return None, "Invalid filter %s." % next_filter return filter_list, "Okay!"
[docs]def gen_randfilter(msg): randfilter = choice(list(output_filters.values())) return randfilter(msg)
output_filters = { "passthrough": lambda x: x, "hashtag": gen_hashtag, "fwilson": gen_fwilson, "creffett": gen_creffett, "slogan": gen_slogan, "insult": gen_insult, "morse": gen_morse, "removevowels": gen_removevowels, "binary": gen_binary, "xkcd": gen_xkcd_sub, "praise": gen_praise, "reverse": reverse, "lenny": gen_lenny, "yoda": gen_yoda, "gizoogle": gen_gizoogle, "shakespeare": gen_shakespeare, "bard": gen_shakespeare, "shibe": gen_shibe, "underscore": gen_underscore, "translate": gen_translate, "randtrans": gen_random_translate, "randfilter": gen_randfilter }