Source code for http_request_randomizer.requests.parsers.UrlParser

import re

from http_request_randomizer.requests.errors.ParserExceptions import ParserException

__author__ = 'pgaref'


[docs]class UrlParser(object): """ An abstract class representing any URL containing Proxy information To add an extra Proxy URL just implement this class and provide a 'url specific' parse_proxyList method Attributes: site url (hhtp) minimum_bandwidth_in_KBs (to avoid straggling proxies when having the extra info from proxy provider) """ def __init__(self, web_url, bandwidthKBs=None): self.url = web_url if bandwidthKBs is not None: self.minimum_bandwidth_in_KBs = bandwidthKBs else: self.minimum_bandwidth_in_KBs = 150
[docs] def get_URl(self): if self.url is None: raise ParserException("webURL is NONE") return self.url
[docs] def get_min_bandwidth(self): if self.minimum_bandwidth_in_KBs < 0: raise ParserException("invalid minimum bandwidth limit {0} ".format(self.minimum_bandwidth_in_KBs)) return self.minimum_bandwidth_in_KBs
[docs] def parse_proxyList(self): raise ParserException(" abstract method should be implemented by each subclass")
def __str__(self): return "URL Parser of '{0}' with required bandwidth: '{1}' KBs" \ .format(self.url, self.minimum_bandwidth_in_KBs) @staticmethod
[docs] def valid_ip(address): """Return ``True`` if the the given *IP* is a *valid* IPv4 address :param address: ip address :type address: string :rtype: bool """ try: host_bytes = address.split('.') valid = [int(b) for b in host_bytes] valid = [b for b in valid if b >= 0 and b <= 255] return len(host_bytes) == 4 and len(valid) == 4 except: return False
@staticmethod
[docs] def valid_ip_port(address): """Return ``True`` if the the given *Port* is a *valid* IPv4 port :param address: ip address :type address: string :rtype: bool """ match = re.findall(r'[0-9]+(?:\.[0-9]+){3}:[0-9]+', address) # hostIP = re.compile("\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}") if not match: return False return True