1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 from httplib import HTTPConnection
33 from socket import error as SocketError, IPPROTO_TCP, TCP_NODELAY
34 from ssl import CERT_NONE
35 from urlparse import urlsplit
36
37 from httxauth import authbasic, authdigest
38 from httxbase import HttxBase
39 from httxcompression import httxdecompress
40 from httxerror import SocketException, RedirectError, MaxRedirectError, ExternalRedirectError
41 from httxrequest import HttxRequest
42 from httxs import HTTPxTunneled, HTTPSxConnection, HTTPSxTunneled
43 from httxutil import parse_keqv_list, parse_http_list, tclock
47 '''
48 Connection connecting object. The HttxConnection is responsible for creating
49 and managing an underlying httplib.HTTPConnection or a L{HTTPSxConnection} (for HTTPS)
50 to perform the actual connection
51
52 The creation of the underlying connection is based upon a dictionary holding the
53 class types indexed by the connection scheme (http or https)
54
55 The connection handles automatically redirection, authentication (auth and digest)
56 and decompression (gzip, bzip2, deflate)
57
58 The behaviour can be altered through by managing the options
59
60 @ivar connFactory: class variable holding the dictionary of connection
61 classes used to instantiate connections
62 @type connFactory: dict
63 @ivar tunnelFactory: class variable holding the dictionary of connection
64 classes used to instantiate tunneled connections
65 @type connFactory: dict
66
67 @ivar url: url used to set the net location to which connections will
68 connect
69 @type url: str
70 @ivar parsed: it holds the result of urlsplit(url) for practical purposes
71 @type parsed: namedtuple from urlsplit
72 @ivar redircount: count of redirections so far performed
73 @type redircount: int
74 @ivar lastreq: Last L{HttxRequest} request issued
75 @type lastreq: L{HttxRequest}
76 @ivar auxhttx: Reference to the auxiliary connection used for either
77 authentication or rediretion if needed
78 @type auxhttx: L{HttxConnection}
79 @ivar conn: actual connection object
80 @type conn: httplib.HTTPConnection or L{HTTPSxConnection} - subclass of
81 httplib.HTTPSConnection
82 @ivar timestamp: last time the connection was used
83 @type int
84 '''
85 connFactory = dict(http=HTTPConnection, https=HTTPSxConnection)
86 tunnelFactory = dict(http=HTTPxTunneled, https=HTTPSxTunneled)
87
89 '''
90 Constructor. It delegates construction to the base class
91 L{HttxBase} and initializes the member variables with the help
92 of the L{createconnection} method
93
94 @param kwargs: keywords arguments passed to L{HttxBase}
95 @see: L{HttxOptions}
96 '''
97 HttxBase.__init__(self, **kwargs)
98
99 self.redircount = kwargs.get('redircount', 0)
100 self.lastreq = None
101 self.auxhttx = None
102 self.tunnelreq = None
103
104 if url:
105 self.url = url
106 self.parsed = urlsplit(self.url)
107
108 self.createconnection(url)
109
110
112 '''
113 Deepcopy support.
114
115 @param memo: standard __deepcopy__ parameter to avoid circular references
116 @type memo: dict
117 @see L{clone}
118 @return: a cloned object
119 @rtype: L{HttxConnection}
120 '''
121 return self.clone()
122
123
124 - def clone(self, options=None):
125 '''
126 Clone the object using the supplied options or a new set of options if
127 given.
128
129 An equivalente set of L{HttxConnection} objects will be replicated
130
131 A new set of options will separate the clone object from the original
132 object, since they will no longer share cookies, user/password/realm
133 combinations or https certificates
134
135 To support a maximum redirection count, a redircount parameter is passed
136 along during the construction of the clone
137
138 @param options: options for the cloned object
139 @type options: L{HttxOptions}
140 @return: a cloned object
141 @rtype: L{HttxConnection}
142 '''
143 if not options:
144 options = self.options.clone()
145 return self.__class__(self.url, redircount=self.redircount, options=options)
146
147
148 @property
150 '''
151 Property to support easy and quick access to the underlying sock object
152 from the underlying connection.
153
154 The sock object is used in the library to index active connections in cache
155 '''
156
157
158
159 return self.conn.sock
160
161
163 '''
164 Reset the conn instance variable to None (possibly after an exception happened)
165 to allow the connection to re-issue a createconnection on "request"
166 '''
167 if self.conn is not None:
168 self.conn.close()
169 self.conn = None
170
171
173 '''
174 Add the appropriate "certificates" and/or paths and options to an underlying
175 https connection to allow it to successfully (and according to user request)
176 execute
177
178 @param url: url that contains the domain to use to pull certificate from the store
179 @type url: str
180 '''
181 key_file, cert_file = self.options.certkeyfile.find_certkey(url)
182 self.conn.key_file = key_file
183 self.conn.cert_file = cert_file
184
185 self.conn.cert_reqs = self.options.certreq.find_cert_req(url)
186 if self.conn.cert_reqs != CERT_NONE:
187 self.conn.ca_certs = self.options.cacert.find_ca_cert(url)
188
189
191 '''
192 Actual connection (with https certificate preparation)
193 '''
194 if self.parsed.scheme == 'https':
195 self.sslize(self.url)
196
197
198 try:
199 self.conn.connect()
200 except SocketError, e:
201 raise SocketException(*e.args)
202
203
204 try:
205 self.conn.sock.setsockopt(IPPROTO_TCP, TCP_NODELAY, 1)
206 except:
207
208 pass
209
210
212 '''
213 Helper function to enable delayed creation of the underlying connection
214 if needed. Called from the L{__init__} and from L{request} in order
215 to ensure an underlying connection is created or recreated if tunneling
216
217 It initializes the member variables: I{url}, I{parsed}, L{conn}, I{clock}
218
219 In the case of https connections it will also set variables in the
220 underlying connection object to ensure certificates and validation
221 are used if requested and appropriate for the domain
222
223 @param url: url to open a connection to
224 @type url: str
225 @param sock: socket for a tunneled connection
226 @type sock: socket (Default: None)
227 @param plaintunnel: if tunnel should not be sslized
228 (connections are fake that do nothing or ssl_wrap)
229 @type plaintunnel: bool (Default: False)
230 '''
231 if not url:
232 self.conn = None
233 return
234
235 self.url = url
236 self.parsed = urlsplit(self.url)
237
238 if not sock:
239 self.conn = self.connFactory[self.parsed.scheme](self.parsed.hostname, self.parsed.port, timeout=self.options.timeout)
240 else:
241 self.conn = self.tunnelFactory[self.parsed.scheme](sock, self.parsed.hostname, self.parsed.port, timeout=self.options.timeout)
242
243 if not sock or not plaintunnel:
244 self.doconnect()
245
246 self.timestamp = tclock()
247
248
250 '''
251 Tunnel a connection over CONNECT if needed and not already done
252 and re-create the underlying connection to use the "CONNECT"ed
253 tunnel
254
255 @param httxreq: Request or url to be executed
256 @type httxreq: L{HttxRequest} or url (string)
257 @param plaintunnel: if tunnel should not be sslized
258 (connections are fake that do nothing or ssl_wrap)
259 @type plaintunnel: bool (Default: False)
260 '''
261 if self.tunnelreq:
262
263 return False
264
265 if isinstance(httxreq, basestring):
266 httxreq = HttxRequest(httxreq)
267
268 if self.parsed.netloc != httxreq.netloc and \
269 ((self.options.httpsconnect and httxreq.scheme == 'https') or \
270 (self.options.httpconnect and httxreq.scheme == 'http')):
271
272
273 self.tunnelreq = httxreq
274 self.tunnelreq.plaintunnel = plaintunnel
275
276 hport = httxreq.parsed.port
277 if hport is None:
278 hport = HTTPSxConnection.default_port if httxreq.scheme == 'https' else HTTPConnection.default_port
279
280 try:
281 self.conn.request('CONNECT', '%s:%s' % (httxreq.parsed.hostname, hport), headers=httxreq.allheaders)
282 except SocketError, e:
283 self.tunnelreq = None
284 raise SocketException(*e.args)
285
286 self.lastreq = httxreq
287 return self.conn.sock
288
289 return False
290
291
293 '''
294 If the user has connected a tunnel manually and asked for "plain tunnel"
295 the tunnel may still be wrapped by ssl manually
296 '''
297 self.doconnect()
298
299
301 '''
302 Send the L{HttxRequest} httxreq to the specified server inside the request
303 It does so by creating a connection if needed, then setting headers with
304 helper functions for ompression, cookies and authentication and then
305 relaying the call to the underlying connection
306
307 @param httxreq: Request or url to be executed
308 @type httxreq: L{HttxRequest} or url (string)
309 @return: sock
310 @rtype: opaque type for the caller (a Python sock)
311 '''
312 if isinstance(httxreq, basestring):
313 httxreq = HttxRequest(httxreq)
314
315
316 if self.conn is None:
317
318 self.createconnection(httxreq.get_full_url())
319
320 if not self.tunnelconnect(httxreq, plaintunnel=getattr(httxreq, 'plaintunnel', False)):
321
322
323 self.addkeepalive(httxreq)
324 self.adddecompress(httxreq)
325 self.addcookies(httxreq)
326 self.addauth(httxreq)
327 self.adduseragent(httxreq)
328 self.addcontent(httxreq)
329
330
331 if self.parsed.netloc != httxreq.netloc or self.options.sendfullurl:
332
333 url = httxreq.get_full_url()
334 else:
335
336 url = httxreq.get_selector()
337
338
339 try:
340 self.conn.request(httxreq.get_method(), url, httxreq.body, httxreq.allheaders)
341 except SocketError, e:
342 raise SocketException(*e.args)
343
344
345 self.lastreq = httxreq
346
347
348 self.timestamp = tclock()
349
350
351 return self.conn.sock
352
353
355 '''
356 Recover a L{HttxResponse}
357
358 The sock parameter is not used but the function follows the abstract
359 definition of L{HttxBase} and the implementations of L{HttxManager}
360 and L{HttxNetLocation}
361
362 Checks for authentication requests or redirectionare made. If the
363 options allow to process those requests, new requests (with potentially
364 new connections) are launched and the connection is marked as active
365 to avoid any other part of the library to reuse it
366
367 Decompression of content and cookie extraction is also performed
368
369 @param sock: The opaque type returned by L{request}
370 @type sock: opaque (a Python sock)
371 @return: response
372 @rtype: L{HttxResponse} (compatible with httplib HTTPResponse)
373 '''
374
375
376 if self.auxhttx and self.auxhttx is not self:
377
378
379 response = self.auxhttx.getresponse(sock)
380
381
382
383 if not response.isactive():
384 self.auxhttx = None
385
386 return response
387
388
389 try:
390 response = self.conn.getresponse()
391 except SocketError, e:
392 self.tunnelreq = None
393 raise SocketException(*e.args)
394
395 if self.options.sendfullurl:
396
397
398
399 pass
400
401
402 self.extractcookies(response)
403 self.decompress(response)
404
405
406 if response.isauth():
407 if self.auxhttx:
408
409 self.auxhttx = None
410 return response
411
412 plaintunnel = getattr(self.tunnelreq, 'plaintunnel', False)
413 self.tunnelreq = None
414 return self.authenticate(response, plaintunnel=plaintunnel)
415
416
417 self.auxhttx = None
418
419
420 if response.isredir():
421 return self.redirect(response)
422
423
424 self.tunneling(response)
425
426
427 return response
428
429
431 '''
432 Add a Cookie header to httxreq if needed
433 It uses a urllib2 cookiejar from the options set
434
435 @param httxreq: Request to be executed
436 @type httxreq: L{HttxRequest}
437 '''
438 if self.options.cookies:
439 self.options.cookiejar.add_cookie_header(httxreq)
440
441
443 '''
444 Add a content-encoding header to httxreq if needed and set in the options
445
446 @param httxreq: Request to be executed
447 @type httxreq: L{HttxRequest}
448 '''
449 if self.options.decompression:
450 httxreq.add_header('Accept-Encoding', self.options.decompmethods.join())
451
452
454 '''
455 Add a WWW-Authenticate or Proxy-Authenticate header to httxreq
456 if needed and set in the options
457
458 It uses a L{HttxAuthCache} from the options
459
460 @param httxreq: Request to be executed
461 @type httxreq: L{HttxRequest}
462 '''
463 parsed = urlsplit(httxreq.get_full_url())
464
465 headername, headerval = self.options.authcache.get(parsed.geturl())
466
467 if headername is not None:
468 httxreq.add_unredirected_header(headername, headerval)
469
470
472 '''
473 Adds the Connection Keep-Alive header
474
475 @param httxreq: Request to be executed
476 @type httxreq: L{HttxRequest}
477 '''
478 if self.options.connkeepalive:
479 httxreq.add_header('Connection', 'Keep-Alive')
480
481
483 '''
484 Adds the UserAgent header if needed to
485
486 @param httxreq: Request to be executed
487 @type httxreq: L{HttxRequest}
488 '''
489 if self.options.useragent:
490 httxreq.add_header('User-Agent', self.options.useragent)
491
492
493 - def addcontent(self, httxreq):
494 '''
495 Add headers to httxreq if data is transmitted, borrowed from urllib2.
496
497 @param httxreq: Request to be executed
498 @type httxreq: L{HttxRequest}
499 '''
500 if httxreq.body and httxreq.ispost() and not httxreq.has_header('Content-Type'):
501 httxreq.add_unredirected_header('Content-Type', 'application/x-www-form-urlencoded')
502
503
505 '''
506 Perform cookie extraction from a response into a urllib2 cookiejar
507 in the options set
508
509 @param response: A response being processed
510 @type response: L{HttxResponse}
511 '''
512 if self.options.cookies:
513 self.options.cookiejar.extract_cookies(response, self.lastreq)
514
515
517 '''
518 Perform body decompression if enabled by the options and present
519 in the response
520
521 @param response: A response being processed
522 @type response: L{HttxResponse}
523 '''
524 if not self.options.decompression and not self.options.autodecompression:
525 return
526
527 httxdecompress(response)
528
529
531 '''
532 Checks if a tunnel (CONNECT) request was in place and if it has been
533 correctly established.
534
535 If so, it will reissue the original request, cleaning proxy authorization
536 headers if they existed
537
538 @param response: A response being processed
539 @type response: L{HttxResponse}
540 '''
541 if self.tunnelreq:
542 tunnelreq = self.tunnelreq
543 self.tunnelreq = None
544
545
546
547
548 if 'Proxy-authorization' in tunnelreq.unredirected_hdrs:
549 del tunnelreq.unredirected_hdrs['Proxy-authorization']
550
551 if response.status == 200:
552
553 self.createconnection(tunnelreq.get_full_url(),
554 sock=self.conn.sock, plaintunnel=tunnelreq.plaintunnel)
555
556 if not tunnelreq.plaintunnel:
557
558 response.tunnelreq = tunnelreq
559 response.sock = self.request(tunnelreq)
560
561
562
563
564
566 '''
567 Perform redirection if the response requests it and enabled by the
568 options set
569
570 @param response: A response being processed
571 @type response: L{HttxResponse}
572 @return: The same response with a new sock if redirection is
573 done to an external site
574 @rtype: L{HttxResponse}
575 '''
576 if not self.options.redirect:
577
578 return response
579
580 if self.redircount == self.options.maxredirects:
581
582 raise MaxRedirectError(response, 'Reached the maximum number of redirects')
583
584 if self.lastreq.ispost() and not response.isredirpost():
585 raise RedirectError(response, '307 redirection code for a POST request')
586
587 if self.lastreq.ispost() and response.isredirpostrfc2616() and not self.options.rfc2616postredir:
588 return response
589
590 locationurl = response.getheader('location', None)
591
592 if not locationurl:
593 raise RedirectError(response, 'Redirect error: missing location header')
594
595 parsed = urlsplit(locationurl)
596
597
598 if not parsed.scheme or not parsed.netloc:
599 parsed = list(parsed)
600 parsed[:2] = self.lastreq.scheme, self.lastreq.netloc
601 parsed = SplitResult(*parsed)
602 locationurl = urlunsplit(parsed)
603
604 if not self.options.externalredirect and self.lastreq.netloc != parsed.netloc:
605
606 raise ExternalRedirectError(response, 'External redirect not allowed')
607
608
609
610 if self.parsed.netloc != self.lastreq.netloc:
611 auxhttxurl = self.url
612 else:
613 auxhttxurl = locationurl
614
615
616 auxhttx = self.__class__(auxhttxurl, redircount=self.redircount + 1, options=self.options)
617
618
619 redirreq = self.lastreq.clone(locationurl)
620
621
622 response.sock = auxhttx.request(redirreq)
623 self.auxhttx = auxhttx
624
625 return response
626
627
629 '''
630 Perform authentication if the response requests it and enabled by the
631 options set
632
633 @param response: A response being processed
634 @type response: L{HttxResponse}
635 @return: The same response preocessed
636 @rtype: L{HttxResponse}
637 '''
638 authheaderserver = {401:'www-authenticate', 407:'proxy-authenticate'}
639 authheaderclient = {401:'authorization', 407:'proxy-authorization'}
640
641 if not self.options.auth:
642 return response
643
644 if response.isauthuser() and not self.options.authuser:
645 return response
646
647 if response.isauthproxy() and not self.options.authproxy:
648 return response
649
650
651 authheader = response.getheader(authheaderserver[response.status])
652
653
654 authschemes = dict()
655 lastscheme = None
656
657
658 for elem in parse_http_list(authheader):
659 authparts = elem.split(' ', 1)
660 if lastscheme and '=' in authparts[0]:
661
662 authschemes[lastscheme].append(authparts[0])
663 continue
664
665
666 lastscheme = authparts[0].lower()
667 authschemes[lastscheme] = list()
668
669
670 if len(authparts) > 1:
671 authschemes[lastscheme].append(authparts[1])
672
673 for scheme, keqv_list in authschemes.iteritems():
674 authschemes[scheme] = parse_keqv_list(keqv_list)
675
676
677
678 authurl = self.lastreq.get_full_url() if response.status == 401 else self.url
679
680 authscheme = None
681 authanswer = None
682 authcachedata = None
683
684 if self.options.authhandler:
685 authcachedata = self.options.authcache.get(authurl, opaque=True)
686 authscheme, authanswer, authcachedata = self.options.authhandler(authurl, authschemes, authcachedata)
687
688 if authscheme is None or authanswer is None:
689
690 if 'digest' in authschemes:
691 authchallenge = authschemes['digest']
692 realm = authchallenge.get('realm', None)
693 username, password = self.options.passmanager.find_user_password(realm, authurl)
694
695 if 'nonce' in authchallenge:
696 nonce_count = self.options.authcache.getnoncecount(authchallenge['nonce'])
697 authscheme, authanswer, authcachedata = authdigest(username, password, authchallenge, self.lastreq, nonce_count)
698
699 elif 'basic' in authschemes:
700 authchallenge = authschemes['basic']
701 realm = authchallenge.get('realm', None)
702 username, password = self.options.passmanager.find_user_password(realm, authurl)
703
704 if username is not None and password is not None:
705 authscheme, authanswer, authcachedata = authbasic(username, password, authchallenge)
706
707 if authscheme is None or authanswer is None:
708 return response
709
710
711 authorization = '%s %s' % (authscheme, authanswer)
712
713 authheaderresp = authheaderclient[response.status]
714
715
716 authreq = self.lastreq
717 authreq.add_unredirected_header(authheaderresp, authorization)
718
719 authreq.plaintunnel = plaintunnel
720
721 if authcachedata is not None:
722
723 self.options.authcache.set(authreq.parsed.geturl(), authheaderresp, authscheme, authanswer, authcachedata)
724
725 response.sock = self.request(authreq)
726 self.auxhttx = self
727
728 return response
729