Package ndg :: Package httpsclient :: Module utils
[hide private]

Source Code for Module ndg.httpsclient.utils

  1  """Utilities using NDG HTTPS Client, including a main module that can be used to 
  2  fetch from a URL. 
  3  """ 
  4  __author__ = "R B Wilkinson" 
  5  __date__ = "09/12/11" 
  6  __copyright__ = "(C) 2011 Science and Technology Facilities Council" 
  7  __license__ = "BSD - see LICENSE file in top-level directory" 
  8  __contact__ = "Philip.Kershaw@stfc.ac.uk" 
  9  __revision__ = '$Id$' 
 10   
 11  import cookielib 
 12  import httplib 
 13  import logging 
 14  from optparse import OptionParser 
 15  import os 
 16  import urllib2 
 17  from urllib2 import (HTTPHandler, HTTPCookieProcessor,  
 18                       HTTPBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm) 
 19   
 20  import urlparse 
 21   
 22  from ndg.httpsclient.urllib2_build_opener import build_opener 
 23  from ndg.httpsclient.https import HTTPSContextHandler 
 24  from ndg.httpsclient import ssl_context_util 
 25   
 26  log = logging.getLogger(__name__) 
 27   
28 -class AccumulatingHTTPCookieProcessor(HTTPCookieProcessor):
29 """Cookie processor that adds new cookies (instead of replacing the existing 30 ones as HTTPCookieProcessor does) 31 """
32 - def http_request(self, request):
33 """Processes cookies for a HTTP request. 34 @param request: request to process 35 @type request: urllib2.Request 36 @return: request 37 @rtype: urllib2.Request 38 """ 39 COOKIE_HEADER_NAME = "Cookie" 40 tmp_request = urllib2.Request(request.get_full_url(), request.data, {}, 41 request.origin_req_host, 42 request.unverifiable) 43 self.cookiejar.add_cookie_header(tmp_request) 44 # Combine existing and new cookies. 45 new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME) 46 if new_cookies: 47 if request.has_header(COOKIE_HEADER_NAME): 48 # Merge new cookies with existing ones. 49 old_cookies = request.get_header(COOKIE_HEADER_NAME) 50 merged_cookies = '; '.join([old_cookies, new_cookies]) 51 request.add_unredirected_header(COOKIE_HEADER_NAME, 52 merged_cookies) 53 else: 54 # No existing cookies so just set new ones. 55 request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies) 56 return request
57 58 # Process cookies for HTTPS in the same way. 59 https_request = http_request
60 61
62 -class URLFetchError(Exception):
63 """Error fetching content from URL"""
64 65
66 -def fetch_from_url(url, config, data=None, handlers=None):
67 """Returns data retrieved from a URL. 68 @param url: URL to attempt to open 69 @type url: basestring 70 @param config: SSL context configuration 71 @type config: Configuration 72 @return data retrieved from URL or None 73 """ 74 return_code, return_message, response = open_url(url, config, data=data, 75 handlers=handlers) 76 if return_code and return_code == httplib.OK: 77 return_data = response.read() 78 response.close() 79 return return_data 80 else: 81 raise URLFetchError(return_message)
82
83 -def fetch_from_url_to_file(url, config, output_file, data=None, handlers=None):
84 """Writes data retrieved from a URL to a file. 85 @param url: URL to attempt to open 86 @type url: basestring 87 @param config: SSL context configuration 88 @type config: Configuration 89 @param output_file: output file 90 @type output_file: basestring 91 @return: tuple ( 92 returned HTTP status code or 0 if an error occurred 93 returned message 94 boolean indicating whether access was successful) 95 """ 96 return_code, return_message, response = open_url(url, config, data=data, 97 handlers=handlers) 98 if return_code == httplib.OK: 99 return_data = response.read() 100 response.close() 101 outfile = open(output_file, "w") 102 outfile.write(return_data) 103 outfile.close() 104 return return_code, return_message, return_code == httplib.OK
105
106 -def fetch_stream_from_url(url, config, data=None, handlers=None):
107 """Returns data retrieved from a URL. 108 @param url: URL to attempt to open 109 @type url: basestring 110 @param config: SSL context configuration 111 @type config: Configuration 112 @return: data retrieved from URL or None 113 @rtype: file derived type 114 """ 115 return_code, return_message, response = open_url(url, config, data=data, 116 handlers=handlers) 117 if return_code and return_code == httplib.OK: 118 return response 119 else: 120 raise URLFetchError(return_message)
121 122
123 -def open_url(url, config, data=None, handlers=None):
124 """Attempts to open a connection to a specified URL. 125 @param url: URL to attempt to open 126 @param config: SSL context configuration 127 @type config: Configuration 128 @param data: HTTP POST data 129 @type data: str 130 @param handlers: list of custom urllib2 handlers to add to the request 131 @type handlers: iterable 132 @return: tuple ( 133 returned HTTP status code or 0 if an error occurred 134 returned message or error description 135 response object) 136 """ 137 debuglevel = 1 if config.debug else 0 138 139 # Set up handlers for URL opener. 140 if config.cookie: 141 cj = config.cookie 142 else: 143 cj = cookielib.CookieJar() 144 # Use a cookie processor that accumulates cookies when redirects occur so 145 # that an application can redirect for authentication and retain both any 146 # cookies for the application and the security system (c.f., 147 # urllib2.HTTPCookieProcessor which replaces cookies). 148 cookie_handler = AccumulatingHTTPCookieProcessor(cj) 149 150 if not handlers: 151 handlers = [] 152 153 handlers.append(cookie_handler) 154 155 if config.debug: 156 http_handler = HTTPHandler(debuglevel=debuglevel) 157 https_handler = HTTPSContextHandler(config.ssl_context, 158 debuglevel=debuglevel) 159 handlers.extend([http_handler, https_handler]) 160 161 if config.http_basicauth: 162 # currently only supports http basic auth 163 auth_handler = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm()) 164 auth_handler.add_password(realm=None, uri=url, 165 user=config.httpauth[0], 166 passwd=config.httpauth[1]) 167 handlers.append(auth_handler) 168 169 170 # Explicitly remove proxy handling if the host is one listed in the value of 171 # the no_proxy environment variable because urllib2 does use proxy settings 172 # set via http_proxy and https_proxy, but does not take the no_proxy value 173 # into account. 174 if not _should_use_proxy(url, config.no_proxy): 175 handlers.append(urllib2.ProxyHandler({})) 176 log.debug("Not using proxy") 177 elif config.proxies: 178 handlers.append(urllib2.ProxyHandler(config.proxies)) 179 log.debug("Configuring proxies: %s" % config.proxies) 180 181 opener = build_opener(*handlers, ssl_context=config.ssl_context) 182 183 headers = config.headers 184 if headers is None: 185 headers = {} 186 187 request = urllib2.Request(url, data, headers) 188 189 # Open the URL and check the response. 190 return_code = 0 191 return_message = '' 192 response = None 193 try: 194 response = opener.open(request) 195 return_message = response.msg 196 return_code = response.code 197 if log.isEnabledFor(logging.DEBUG): 198 for index, cookie in enumerate(cj): 199 log.debug("%s : %s", index, cookie) 200 except urllib2.HTTPError, exc: 201 return_code = exc.code 202 return_message = "Error: %s" % exc.msg 203 if log.isEnabledFor(logging.DEBUG): 204 log.debug("%s %s", exc.code, exc.msg) 205 except Exception, exc: 206 return_message = "Error: %s" % exc.__str__() 207 if log.isEnabledFor(logging.DEBUG): 208 import traceback 209 log.debug(traceback.format_exc()) 210 return (return_code, return_message, response)
211 212
213 -def _should_use_proxy(url, no_proxy=None):
214 """Determines whether a proxy should be used to open a connection to the 215 specified URL, based on the value of the no_proxy environment variable. 216 @param url: URL 217 @type url: basestring or urllib2.Request 218 """ 219 if no_proxy is None: 220 no_proxy_effective = os.environ.get('no_proxy', '') 221 else: 222 no_proxy_effective = no_proxy 223 224 urlObj = urlparse.urlparse(_url_as_string(url)) 225 for np in [h.strip() for h in no_proxy_effective.split(',')]: 226 if urlObj.hostname == np: 227 return False 228 229 return True
230
231 -def _url_as_string(url):
232 """Returns the URL string from a URL value that is either a string or 233 urllib2.Request.. 234 @param url: URL 235 @type url: basestring or urllib2.Request 236 @return: URL string 237 @rtype: basestring 238 """ 239 if isinstance(url, urllib2.Request): 240 return url.get_full_url() 241 elif isinstance(url, basestring): 242 return url 243 else: 244 raise TypeError("Expected type %r or %r" % 245 (basestring, urllib2.Request))
246 247
248 -class Configuration(object):
249 """Connection configuration. 250 """
251 - def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None, 252 cookie=None, http_basicauth=None, headers=None):
253 """ 254 @param ssl_context: SSL context to use with this configuration 255 @type ssl_context: OpenSSL.SSL.Context 256 @param debug: if True, output debugging information 257 @type debug: bool 258 @param proxies: proxies to use for 259 @type proxies: dict with basestring keys and values 260 @param no_proxy: hosts for which a proxy should not be used 261 @type no_proxy: basestring 262 @param cookie: cookies to set for request 263 @type cookie: cookielib.CookieJar 264 @param http_basicauth: http authentication, or None 265 @type http_basicauth: tuple of (username,password) 266 @param headers: http headers 267 @type headers: dict 268 """ 269 self.ssl_context = ssl_context 270 self.debug = debug 271 self.proxies = proxies 272 self.no_proxy = no_proxy 273 self.cookie = cookie 274 self.http_basicauth = http_basicauth 275 self.headers = headers
276 277
278 -def main():
279 '''Utility to fetch data using HTTP or HTTPS GET from a specified URL. 280 ''' 281 parser = OptionParser(usage="%prog [options] url") 282 parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE", 283 default=os.path.expanduser("~/credentials.pem"), 284 help="Certificate file - defaults to $HOME/credentials.pem") 285 parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE", 286 default=None, 287 help="Private key file - defaults to the certificate file") 288 parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir", 289 metavar="PATH", 290 default=None, 291 help="Trusted CA certificate file directory") 292 parser.add_option("-d", "--debug", action="store_true", dest="debug", 293 default=False, 294 help="Print debug information.") 295 parser.add_option("-p", "--post-data-file", dest="data_file", 296 metavar="FILE", default=None, 297 help="POST data file") 298 parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE", 299 default=None, help="Output file") 300 parser.add_option("-n", "--no-verify-peer", action="store_true", 301 dest="no_verify_peer", default=False, 302 help="Skip verification of peer certificate.") 303 parser.add_option("-a", "--basicauth", dest="auth", metavar="USER:PASSWD", 304 default=None, 305 help="HTTP authentication credentials") 306 parser.add_option("--header", action="append", dest="headers", 307 metavar="HEADER: VALUE", 308 help="Add HTTP header to request") 309 (options, args) = parser.parse_args() 310 if len(args) != 1: 311 parser.error("Incorrect number of arguments") 312 313 url = args[0] 314 315 if options.debug: 316 logging.getLogger().setLevel(logging.DEBUG) 317 318 if options.key_file and os.path.exists(options.key_file): 319 key_file = options.key_file 320 else: 321 key_file = None 322 323 if options.cert_file and os.path.exists(options.cert_file): 324 cert_file = options.cert_file 325 else: 326 cert_file = None 327 328 if options.ca_dir and os.path.exists(options.ca_dir): 329 ca_dir = options.ca_dir 330 else: 331 ca_dir = None 332 333 verify_peer = not options.no_verify_peer 334 335 if options.data_file and os.path.exists(options.data_file): 336 data_file = open(options.data_file) 337 data = data_file.read() 338 data_file.close() 339 else: 340 data = None 341 342 if options.basicauth: 343 http_basicauth = options.auth.split(':', 1) 344 else: 345 http_basicauth = None 346 347 headers = {} 348 if options.headers: 349 for h in options.headers: 350 key, val = h.split(':', 1) 351 headers[key.strip()] = val.lstrip() 352 353 # If a private key file is not specified, the key is assumed to be stored in 354 # the certificate file. 355 ssl_context = ssl_context_util.make_ssl_context(key_file, 356 cert_file, 357 None, 358 ca_dir, 359 verify_peer, 360 url) 361 362 config = Configuration(ssl_context, 363 options.debug, 364 http_basicauth=http_basicauth, 365 headers=headers) 366 if options.output_file: 367 return_code, return_message = fetch_from_url_to_file( 368 url, 369 config, 370 options.output_file, 371 data)[:2] 372 raise SystemExit(return_code, return_message) 373 else: 374 data = fetch_from_url(url, config) 375 print(data)
376 377 378 if __name__=='__main__': 379 logging.basicConfig() 380 main() 381