1 """Utilities using NDG HTTPS Client, including a main module that can be used to
2 fetch from a URL.
3 """
4 __author__ = "R B Wilkinson"
5 __date__ = "09/12/11"
6 __copyright__ = "(C) 2011 Science and Technology Facilities Council"
7 __license__ = "BSD - see LICENSE file in top-level directory"
8 __contact__ = "Philip.Kershaw@stfc.ac.uk"
9 __revision__ = '$Id$'
10
11 import cookielib
12 import httplib
13 import logging
14 from optparse import OptionParser
15 import os
16 import urllib2
17 from urllib2 import (HTTPHandler, HTTPCookieProcessor,
18 HTTPBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm)
19
20 import urlparse
21
22 from ndg.httpsclient.urllib2_build_opener import build_opener
23 from ndg.httpsclient.https import HTTPSContextHandler
24 from ndg.httpsclient import ssl_context_util
25
26 log = logging.getLogger(__name__)
27
29 """Cookie processor that adds new cookies (instead of replacing the existing
30 ones as HTTPCookieProcessor does)
31 """
33 """Processes cookies for a HTTP request.
34 @param request: request to process
35 @type request: urllib2.Request
36 @return: request
37 @rtype: urllib2.Request
38 """
39 COOKIE_HEADER_NAME = "Cookie"
40 tmp_request = urllib2.Request(request.get_full_url(), request.data, {},
41 request.origin_req_host,
42 request.unverifiable)
43 self.cookiejar.add_cookie_header(tmp_request)
44
45 new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
46 if new_cookies:
47 if request.has_header(COOKIE_HEADER_NAME):
48
49 old_cookies = request.get_header(COOKIE_HEADER_NAME)
50 merged_cookies = '; '.join([old_cookies, new_cookies])
51 request.add_unredirected_header(COOKIE_HEADER_NAME,
52 merged_cookies)
53 else:
54
55 request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
56 return request
57
58
59 https_request = http_request
60
61
63 """Error fetching content from URL"""
64
65
67 """Returns data retrieved from a URL.
68 @param url: URL to attempt to open
69 @type url: basestring
70 @param config: SSL context configuration
71 @type config: Configuration
72 @return data retrieved from URL or None
73 """
74 return_code, return_message, response = open_url(url, config, data=data,
75 handlers=handlers)
76 if return_code and return_code == httplib.OK:
77 return_data = response.read()
78 response.close()
79 return return_data
80 else:
81 raise URLFetchError(return_message)
82
84 """Writes data retrieved from a URL to a file.
85 @param url: URL to attempt to open
86 @type url: basestring
87 @param config: SSL context configuration
88 @type config: Configuration
89 @param output_file: output file
90 @type output_file: basestring
91 @return: tuple (
92 returned HTTP status code or 0 if an error occurred
93 returned message
94 boolean indicating whether access was successful)
95 """
96 return_code, return_message, response = open_url(url, config, data=data,
97 handlers=handlers)
98 if return_code == httplib.OK:
99 return_data = response.read()
100 response.close()
101 outfile = open(output_file, "w")
102 outfile.write(return_data)
103 outfile.close()
104 return return_code, return_message, return_code == httplib.OK
105
107 """Returns data retrieved from a URL.
108 @param url: URL to attempt to open
109 @type url: basestring
110 @param config: SSL context configuration
111 @type config: Configuration
112 @return: data retrieved from URL or None
113 @rtype: file derived type
114 """
115 return_code, return_message, response = open_url(url, config, data=data,
116 handlers=handlers)
117 if return_code and return_code == httplib.OK:
118 return response
119 else:
120 raise URLFetchError(return_message)
121
122
123 -def open_url(url, config, data=None, handlers=None):
124 """Attempts to open a connection to a specified URL.
125 @param url: URL to attempt to open
126 @param config: SSL context configuration
127 @type config: Configuration
128 @param data: HTTP POST data
129 @type data: str
130 @param handlers: list of custom urllib2 handlers to add to the request
131 @type handlers: iterable
132 @return: tuple (
133 returned HTTP status code or 0 if an error occurred
134 returned message or error description
135 response object)
136 """
137 debuglevel = 1 if config.debug else 0
138
139
140 if config.cookie:
141 cj = config.cookie
142 else:
143 cj = cookielib.CookieJar()
144
145
146
147
148 cookie_handler = AccumulatingHTTPCookieProcessor(cj)
149
150 if not handlers:
151 handlers = []
152
153 handlers.append(cookie_handler)
154
155 if config.debug:
156 http_handler = HTTPHandler(debuglevel=debuglevel)
157 https_handler = HTTPSContextHandler(config.ssl_context,
158 debuglevel=debuglevel)
159 handlers.extend([http_handler, https_handler])
160
161 if config.http_basicauth:
162
163 auth_handler = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm())
164 auth_handler.add_password(realm=None, uri=url,
165 user=config.httpauth[0],
166 passwd=config.httpauth[1])
167 handlers.append(auth_handler)
168
169
170
171
172
173
174 if not _should_use_proxy(url, config.no_proxy):
175 handlers.append(urllib2.ProxyHandler({}))
176 log.debug("Not using proxy")
177 elif config.proxies:
178 handlers.append(urllib2.ProxyHandler(config.proxies))
179 log.debug("Configuring proxies: %s" % config.proxies)
180
181 opener = build_opener(*handlers, ssl_context=config.ssl_context)
182
183 headers = config.headers
184 if headers is None:
185 headers = {}
186
187 request = urllib2.Request(url, data, headers)
188
189
190 return_code = 0
191 return_message = ''
192 response = None
193 try:
194 response = opener.open(request)
195 return_message = response.msg
196 return_code = response.code
197 if log.isEnabledFor(logging.DEBUG):
198 for index, cookie in enumerate(cj):
199 log.debug("%s : %s", index, cookie)
200 except urllib2.HTTPError, exc:
201 return_code = exc.code
202 return_message = "Error: %s" % exc.msg
203 if log.isEnabledFor(logging.DEBUG):
204 log.debug("%s %s", exc.code, exc.msg)
205 except Exception, exc:
206 return_message = "Error: %s" % exc.__str__()
207 if log.isEnabledFor(logging.DEBUG):
208 import traceback
209 log.debug(traceback.format_exc())
210 return (return_code, return_message, response)
211
212
214 """Determines whether a proxy should be used to open a connection to the
215 specified URL, based on the value of the no_proxy environment variable.
216 @param url: URL
217 @type url: basestring or urllib2.Request
218 """
219 if no_proxy is None:
220 no_proxy_effective = os.environ.get('no_proxy', '')
221 else:
222 no_proxy_effective = no_proxy
223
224 urlObj = urlparse.urlparse(_url_as_string(url))
225 for np in [h.strip() for h in no_proxy_effective.split(',')]:
226 if urlObj.hostname == np:
227 return False
228
229 return True
230
232 """Returns the URL string from a URL value that is either a string or
233 urllib2.Request..
234 @param url: URL
235 @type url: basestring or urllib2.Request
236 @return: URL string
237 @rtype: basestring
238 """
239 if isinstance(url, urllib2.Request):
240 return url.get_full_url()
241 elif isinstance(url, basestring):
242 return url
243 else:
244 raise TypeError("Expected type %r or %r" %
245 (basestring, urllib2.Request))
246
247
249 """Connection configuration.
250 """
251 - def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
252 cookie=None, http_basicauth=None, headers=None):
253 """
254 @param ssl_context: SSL context to use with this configuration
255 @type ssl_context: OpenSSL.SSL.Context
256 @param debug: if True, output debugging information
257 @type debug: bool
258 @param proxies: proxies to use for
259 @type proxies: dict with basestring keys and values
260 @param no_proxy: hosts for which a proxy should not be used
261 @type no_proxy: basestring
262 @param cookie: cookies to set for request
263 @type cookie: cookielib.CookieJar
264 @param http_basicauth: http authentication, or None
265 @type http_basicauth: tuple of (username,password)
266 @param headers: http headers
267 @type headers: dict
268 """
269 self.ssl_context = ssl_context
270 self.debug = debug
271 self.proxies = proxies
272 self.no_proxy = no_proxy
273 self.cookie = cookie
274 self.http_basicauth = http_basicauth
275 self.headers = headers
276
277
279 '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
280 '''
281 parser = OptionParser(usage="%prog [options] url")
282 parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
283 default=os.path.expanduser("~/credentials.pem"),
284 help="Certificate file - defaults to $HOME/credentials.pem")
285 parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
286 default=None,
287 help="Private key file - defaults to the certificate file")
288 parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
289 metavar="PATH",
290 default=None,
291 help="Trusted CA certificate file directory")
292 parser.add_option("-d", "--debug", action="store_true", dest="debug",
293 default=False,
294 help="Print debug information.")
295 parser.add_option("-p", "--post-data-file", dest="data_file",
296 metavar="FILE", default=None,
297 help="POST data file")
298 parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
299 default=None, help="Output file")
300 parser.add_option("-n", "--no-verify-peer", action="store_true",
301 dest="no_verify_peer", default=False,
302 help="Skip verification of peer certificate.")
303 parser.add_option("-a", "--basicauth", dest="auth", metavar="USER:PASSWD",
304 default=None,
305 help="HTTP authentication credentials")
306 parser.add_option("--header", action="append", dest="headers",
307 metavar="HEADER: VALUE",
308 help="Add HTTP header to request")
309 (options, args) = parser.parse_args()
310 if len(args) != 1:
311 parser.error("Incorrect number of arguments")
312
313 url = args[0]
314
315 if options.debug:
316 logging.getLogger().setLevel(logging.DEBUG)
317
318 if options.key_file and os.path.exists(options.key_file):
319 key_file = options.key_file
320 else:
321 key_file = None
322
323 if options.cert_file and os.path.exists(options.cert_file):
324 cert_file = options.cert_file
325 else:
326 cert_file = None
327
328 if options.ca_dir and os.path.exists(options.ca_dir):
329 ca_dir = options.ca_dir
330 else:
331 ca_dir = None
332
333 verify_peer = not options.no_verify_peer
334
335 if options.data_file and os.path.exists(options.data_file):
336 data_file = open(options.data_file)
337 data = data_file.read()
338 data_file.close()
339 else:
340 data = None
341
342 if options.basicauth:
343 http_basicauth = options.auth.split(':', 1)
344 else:
345 http_basicauth = None
346
347 headers = {}
348 if options.headers:
349 for h in options.headers:
350 key, val = h.split(':', 1)
351 headers[key.strip()] = val.lstrip()
352
353
354
355 ssl_context = ssl_context_util.make_ssl_context(key_file,
356 cert_file,
357 None,
358 ca_dir,
359 verify_peer,
360 url)
361
362 config = Configuration(ssl_context,
363 options.debug,
364 http_basicauth=http_basicauth,
365 headers=headers)
366 if options.output_file:
367 return_code, return_message = fetch_from_url_to_file(
368 url,
369 config,
370 options.output_file,
371 data)[:2]
372 raise SystemExit(return_code, return_message)
373 else:
374 data = fetch_from_url(url, config)
375 print(data)
376
377
378 if __name__=='__main__':
379 logging.basicConfig()
380 main()
381