Source code for http_request_randomizer.requests.proxy.requestProxy

import logging
import os
import random
import sys
import time

import requests
from requests.exceptions import ChunkedEncodingError
from requests.exceptions import ConnectionError
from requests.exceptions import ReadTimeout

from http_request_randomizer.requests.errors.ProxyListException import ProxyListException
from http_request_randomizer.requests.parsers.FreeProxyParser import FreeProxyParser
from http_request_randomizer.requests.parsers.ProxyForEuParser import ProxyForEuParser
from http_request_randomizer.requests.parsers.RebroWeeblyParser import RebroWeeblyParser
from http_request_randomizer.requests.parsers.SamairProxyParser import SamairProxyParser
from http_request_randomizer.requests.useragent.userAgent import UserAgentManager

__author__ = 'pgaref'
sys.path.insert(0, os.path.abspath('../../../../'))

# Push back requests library to at least warnings
logging.getLogger("requests").setLevel(logging.WARNING)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)-6s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)


[docs]class RequestProxy: def __init__(self, web_proxy_list=[], sustain=False): self.userAgent = UserAgentManager() self.logger = logging.getLogger() self.logger.addHandler(handler) self.logger.setLevel(0) ##### # Each of the classes below implements a specific URL Parser ##### parsers = list([]) parsers.append(FreeProxyParser('http://free-proxy-list.net')) parsers.append(ProxyForEuParser('http://proxyfor.eu/geo.php', 1.0)) parsers.append(RebroWeeblyParser('http://rebro.weebly.com')) # parsers.append(SamairProxyParser('http://samair.ru/proxy/time-01.htm')) self.logger.debug("=== Initialized Proxy Parsers ===") for i in range(len(parsers)): self.logger.debug("\t {0}".format(parsers[i].__str__())) self.logger.debug("=================================") self.sustain = sustain self.parsers = parsers self.proxy_list = web_proxy_list for i in range(len(parsers)): self.proxy_list += parsers[i].parse_proxyList() self.current_proxy = self.randomize_proxy()
[docs] def set_logger_level(self, level): self.logger.setLevel(level)
[docs] def get_proxy_list(self): return self.proxy_list
[docs] def generate_random_request_headers(self): headers = { "Connection": "close", # another way to cover tracks "User-Agent": self.userAgent.get_random_user_agent() } # select a random user agent return headers
[docs] def randomize_proxy(self): if len(self.proxy_list) == 0: raise ProxyListException("list is empty") rand_proxy = random.choice(self.proxy_list) while not rand_proxy: rand_proxy = random.choice(self.proxy_list) self.current_proxy = rand_proxy return rand_proxy
##### # Proxy format: # http://<USERNAME>:<PASSWORD>@<IP-ADDR>:<PORT> #####
[docs] def generate_proxied_request(self, url, method="GET", params={}, data={}, headers={}, req_timeout=30): try: random.shuffle(self.proxy_list) req_headers = dict(params.items() + self.generate_random_request_headers().items()) if not self.sustain: self.randomize_proxy() headers.update(req_headers) self.logger.debug("Using proxy: {0}".format(str(self.current_proxy))) request = requests.request(method, url, proxies={"http": self.current_proxy}, headers=headers, data=data, params=params, timeout=req_timeout) # Avoid HTTP request errors if request.status_code == 409: raise ConnectionError("HTTP Response [409] - Possible Cloudflare DNS resolution error") elif request.status_code == 403: raise ConnectionError("HTTP Response [403] - Permission denied error") elif request.status_code == 503: raise ConnectionError("HTTP Response [503] - Service unavailable error") print 'RR Status {}'.format(request.status_code) return request except ConnectionError: try: self.proxy_list.remove(self.current_proxy) except ValueError: pass self.logger.debug("Proxy unreachable - Removed Straggling proxy: {0} PL Size = {1}".format( self.current_proxy, len(self.proxy_list))) self.randomize_proxy() except ReadTimeout: try: self.proxy_list.remove(self.current_proxy) except ValueError: pass self.logger.debug("Read timed out - Removed Straggling proxy: {0} PL Size = {1}".format( self.current_proxy, len(self.proxy_list))) self.randomize_proxy() except ChunkedEncodingError: try: self.proxy_list.remove(self.current_proxy) except ValueError: pass self.logger.debug("Wrong server chunked encoding - Removed Straggling proxy: {0} PL Size = {1}".format( self.current_proxy, len(self.proxy_list))) self.randomize_proxy()
if __name__ == '__main__': start = time.time() req_proxy = RequestProxy() print "Initialization took: {0} sec".format((time.time() - start)) print "Size : ", len(req_proxy.get_proxy_list()) print " ALL = ", req_proxy.get_proxy_list() test_url = 'http://ipv4.icanhazip.com' while True: start = time.time() request = req_proxy.generate_proxied_request(test_url) print "Proxied Request Took: {0} sec => Status: {1}".format((time.time() - start), request.__str__()) if request is not None: print "\t Response: ip={0}".format(u''.join(request.text).encode('utf-8')) print "Proxy List Size: ", len(req_proxy.get_proxy_list()) print"-> Going to sleep.." time.sleep(10)