.. _fbf.plugs.extra.snarf:
snarf
~~~~~
.. automodule:: fbf.plugs.extra.snarf
:show-inheritance:
:members:
:undoc-members:
CODE
----
::
# gozerplugs/plugs/snarf.py
#
#
""" fetch title of url. """
__author__ = "Wijnand 'tehmaze' Modderman - http://tehmaze.com"
__license__ = 'BSD'
__gendoclast__ = ['snarf-disable', ]
__depend__ = ['url', ]
.. _fbf.plugs.extra.snarf_fbf_imports:
fbf imports
--------------
::
from fbf.lib.callbacks import callbacks
from fbf.lib.commands import cmnds
from fbf.lib.examples import examples
from fbf.utils.url import decode_html_entities, get_encoding, geturl, geturl2
from fbf.utils.exception import handle_exception
from fbf.lib.persist import Persist, PlugPersist
from fbf.lib.persistconfig import PersistConfig
from fbf.lib.plugins import plugs as plugins
.. _fbf.plugs.extra.snarf_basic_imports:
basic imports
----------------
::
import urllib.request, urllib.parse, urllib.error
import urllib.request, urllib.error, urllib.parse
import urllib.parse
import copy
import re
import socket
.. _fbf.plugs.extra.snarf_defines_:
defines
----------
::
cfg = PlugPersist('snarf.cfg')
pcfg = PersistConfig()
pcfg.define('allow', ['text/plain', 'text/html', 'application/xml'])
re_html_title = re.compile('
(.*?)', re.I | re.M | re.S)
re_url_match = re.compile('((?:http|https)://\S+)')
re_html_valid = {
'result': re.compile('(Failed validation, \d+ errors?|Passed validation)', re.I | re.M),
'modified': re.compile('Modified: | ([^<]+) | ', re.I | re.M),
'server': re.compile('Server: | ([^<]+) | ', re.I | re.M),
'size': re.compile('Size: | ([^<]+) | ', re.I | re.M),
'content': re.compile('Content-Type: | ([^<]+) | ', re.I | re.M),
'encoding': re.compile('([^<]+) | |