Open Table Of Contents

Source code for bridgedb.txrecaptcha

# -*- coding: utf-8 ; test-case-name: bridgedb.test.test_txrecaptcha -*-
#
# This file is part of BridgeDB, a Tor bridge distribution system.
#
# :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
# :copyright: (c) 2013-2015, Isis Lovecruft
#             (c) 2007-2015, The Tor Project, Inc.
# :license: 3-Clause BSD, see LICENSE for licensing information

"""Twisted-based reCAPTCHA client.

This client *always* uses TLS with strict hostname checking, unlike the
official Google Python recaptcha-client_, which is hardcoded_ to use plaintext
HTTP.

Small portions of this code were taken from the official Google Python
recaptcha-client_ module, version 1.0.6.  Those portions are
:class:`RecaptchaResponse`, :data:`API_SERVER`, They total 5 lines of code,
which are copyright the authors of the recaptcha-client_ package.

.. _hardcoded: https://code.google.com/p/recaptcha/source/browse/trunk/recaptcha-plugins/python/recaptcha/client/captcha.py#76
.. _recaptcha-client: https://pypi.python.org/pypi/recaptcha-client/1.0.6

.. inheritance-diagram:: RecaptchaResponseError RecaptchaResponse RecaptchaResponseProtocol
    :parts: 1
"""

import logging
import urllib

from OpenSSL.crypto import FILETYPE_PEM
from OpenSSL.crypto import load_certificate

from twisted import version as _twistedversion
from twisted.internet import defer
from twisted.internet import protocol
from twisted.internet import reactor
from twisted.python import failure
from twisted.python.versions import Version
from twisted.web import client
from twisted.web.http_headers import Headers
from twisted.web.iweb import IBodyProducer

from zope.interface import implements

from bridgedb.crypto import SSLVerifyingContextFactory

#: This was taken from :data:`recaptcha.client.captcha.API_SSL_SERVER`.
API_SSL_SERVER = API_SERVER = "https://www.google.com/recaptcha/api"
API_SSL_VERIFY_URL = "%s/verify" % API_SSL_SERVER

#: (:class:`OpenSSL.crypto.X509`) Only trust certificate for the reCAPTCHA
#: :data:`API_SSL_SERVER` which were signed by the Google Internet Authority CA.
GOOGLE_INTERNET_AUTHORITY_CA_CERT = load_certificate(FILETYPE_PEM, bytes("""\
-----BEGIN CERTIFICATE-----
MIICsDCCAhmgAwIBAgIDFXfhMA0GCSqGSIb3DQEBBQUAME4xCzAJBgNVBAYTAlVT
MRAwDgYDVQQKEwdFcXVpZmF4MS0wKwYDVQQLEyRFcXVpZmF4IFNlY3VyZSBDZXJ0
aWZpY2F0ZSBBdXRob3JpdHkwHhcNMTIxMjEyMTU1ODUwWhcNMTMxMjMxMTU1ODUw
WjBGMQswCQYDVQQGEwJVUzETMBEGA1UEChMKR29vZ2xlIEluYzEiMCAGA1UEAxMZ
R29vZ2xlIEludGVybmV0IEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw
gYkCgYEAye23pIucV+eEPkB9hPSP0XFjU5nneXQUr0SZMyCSjXvlKAy6rWxJfoNf
NFlOCnowzdDXxFdF7dWq1nMmzq0yE7jXDx07393cCDaob1FEm8rWIFJztyaHNWrb
qeXUWaUr/GcZOfqTGBhs3t0lig4zFEfC7wFQeeT9adGnwKziV28CAwEAAaOBozCB
oDAfBgNVHSMEGDAWgBRI5mj5K9KylddH2CMgEE8zmJCf1DAdBgNVHQ4EFgQUv8Aw
6/VDET5nup6R+/xq2uNrEiQwEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8E
BAMCAQYwOgYDVR0fBDMwMTAvoC2gK4YpaHR0cDovL2NybC5nZW90cnVzdC5jb20v
Y3Jscy9zZWN1cmVjYS5jcmwwDQYJKoZIhvcNAQEFBQADgYEAvprjecFG+iJsxzEF
ZUNgujFQodUovxOWZshcnDW7fZ7mTlk3zpeVJrGPZzhaDhvuJjIfKqHweFB7gwB+
ARlIjNvrPq86fpVg0NOTawALkSqOUMl3MynBQO+spR7EHcRbADQ/JemfTEh2Ycfl
vZqhEFBfurZkX0eTANq98ZvVfpg=
-----END CERTIFICATE-----"""))

# `t.w.client.HTTPConnectionPool` isn't available in Twisted-12.0.0
# (see ticket #11219: https://bugs.torproject.org/11219):
_connectionPoolAvailable = _twistedversion >= Version('twisted', 12, 1, 0)
if _connectionPoolAvailable:
    logging.info("Using HTTPConnectionPool for reCaptcha API server.")
    _pool = client.HTTPConnectionPool(reactor, persistent=False)
    _pool.maxPersistentPerHost = 5
    _pool.cachedConnectionTimeout = 30
    _agent = client.Agent(reactor, pool=_pool)
else:
    logging.warn("Twisted-%s is too old for HTTPConnectionPool! Disabling..."
                 % _twistedversion.short())
    _pool = None
    _agent = client.Agent(reactor)


# Twisted>=14.0.0 changed the way in which hostname verification works.
if _twistedversion >= Version('twisted', 14, 0, 0):
    from twisted.internet._sslverify import OpenSSLCertificateAuthorities

    class RecaptchaOpenSSLCertificateAuthorities(OpenSSLCertificateAuthorities):
        """The trusted CAs for connecting to reCAPTCHA servers."""
        #: A list of `OpenSSL.crypto.X509` objects.
        caCerts = [GOOGLE_INTERNET_AUTHORITY_CA_CERT,]
        def __init__(self):
            super(RecaptchaOpenSSLCertificateAuthorities, self).__init__(self.caCerts)

    class RecaptchaPolicyForHTTPS(client.BrowserLikePolicyForHTTPS):
        _trustRoot = RecaptchaOpenSSLCertificateAuthorities()
        def __init__(self):
            super(RecaptchaPolicyForHTTPS, self).__init__(trustRoot=self._trustRoot)


def _setAgent(agent):
    """Set the global :attr:`agent`.

    :param agent: An :api:`twisted.web.client.Agent` for issuing requests.
    """
    global _agent
    _agent = agent

def _getAgent(reactor=reactor, url=API_SSL_VERIFY_URL, connectTimeout=30,
              **kwargs):
    """Create a :api:`twisted.web.client.Agent` which will verify the
    certificate chain and hostname for the given **url**.

    :param reactor: A provider of the
        :api:`twisted.internet.interface.IReactorTCP` interface.
    :param str url: The full URL which will be requested with the
        ``Agent``. (default: :attr:`API_SSL_VERIFY_URL`)
    :param pool: An :api:`twisted.web.client.HTTPConnectionPool`
        instance. (default: :attr:`_pool`)
    :type connectTimeout: None or int
    :param connectTimeout: If not ``None``, the timeout passed to
        :api:`twisted.internet.reactor.connectTCP` or
        :api:`twisted.internet.reactor.connectSSL` for specifying the
        connection timeout. (default: ``30``)
    """
    # Twisted>=14.0.0 changed the way in which hostname verification works.
    if _twistedversion >= Version('twisted', 14, 0, 0):
        contextFactory = RecaptchaPolicyForHTTPS()
    else:
        contextFactory = SSLVerifyingContextFactory(url)

    if _connectionPoolAvailable:
        return client.Agent(reactor,
                            contextFactory=contextFactory,
                            connectTimeout=connectTimeout,
                            pool=_pool,
                            **kwargs)
    else:
        return client.Agent(reactor,
                            contextFactory=contextFactory,
                            connectTimeout=connectTimeout,
                            **kwargs)

_setAgent(_getAgent())


[docs]class RecaptchaResponseError(ValueError): """There was an error with the reCaptcha API server's response."""
[docs]class RecaptchaResponse(object): """Taken from `recaptcha.client.captcha.RecaptchaResponse`__. .. __: https://code.google.com/p/recaptcha/source/browse/trunk/recaptcha-plugins/python/recaptcha/client/captcha.py#7 """ def __init__(self, is_valid, error_code=None): self.is_valid = is_valid self.error_code = error_code
[docs]class RecaptchaResponseProtocol(protocol.Protocol): """HTML parser which creates a :class:`RecaptchaResponse` from the body of the reCaptcha API server's response. """ def __init__(self, finished): """Create a protocol for creating :class:`RecaptchaResponses <bridgedb.txrecaptcha.RecaptchaResponse>`. :type finished: :api:`twisted.internet.defer.Deferred` :param finished: A deferred which will have its ``callback()`` called with a :class:`RecaptchaResponse`. """ self.finished = finished self.remaining = 1024 * 10 self.response = ''
[docs] def dataReceived(self, data): """Called when some **data** is received from the connection.""" if self.remaining: received = data[:self.remaining] self.response += received self.remaining -= len(received)
[docs] def connectionLost(self, reason): """Called when the connection was closed. :type reason: :api:`twisted.python.failure.Failure` :param reason: A string explaning why the connection was closed, wrapped in a ``Failure`` instance. """ valid = False error = reason.getErrorMessage() try: (valid, error) = self.response.strip().split('\n', 1) except ValueError: error = "Couldn't parse response from reCaptcha API server" valid = bool(valid == "true") result = RecaptchaResponse(is_valid=valid, error_code=error) logging.debug( "ReCaptcha API server response: %s(is_valid=%s, error_code=%s)" % (result.__class__.__name__, valid, error)) self.finished.callback(result)
class _BodyProducer(object): """I write a string into the HTML body of an open request.""" implements(IBodyProducer) def __init__(self, body): self.body = body self.length = len(body) def startProducing(self, consumer): """Start writing the HTML body.""" consumer.write(self.body) return defer.succeed(None) def pauseProducing(self): pass def stopProducing(self): pass def resumeProducing(self): pass def _cbRequest(response): """Callback for a :api:`twisted.web.client.Agent.request` which delivers the result to a :class:`RecaptchaResponseProtocol`. :returns: A :api:`twisted.internet.defer.Deferred` which will callback with a ``recaptcha.RecaptchaResponse`` for the request. """ finished = defer.Deferred() response.deliverBody(RecaptchaResponseProtocol(finished)) return finished def _ebRequest(fail): """Errback for a :api:`twisted.web.client.Agent.request`. :param fail: A :api:`twisted.python.failure.Failure` which occurred during the request. """ logging.debug("txrecaptcha._ebRequest() called with %r" % fail) error = fail.getErrorMessage() or "possible problem in _ebRequest()" return RecaptchaResponse(is_valid=False, error_code=error) def _encodeIfNecessary(string): """Encode unicode objects in utf-8 if necessary.""" if isinstance(string, unicode): return string.encode('utf-8') return string
[docs]def submit(recaptcha_challenge_field, recaptcha_response_field, private_key, remoteip, agent=_agent): """Submits a reCaptcha request for verification. This function is a patched version of the ``recaptcha.client.captcha.submit()`` function in reCaptcha's Python API. It does two things differently: 1. It uses Twisted for everything. 2. It uses SSL/TLS for everything. This function returns a :api:`twisted.internet.defer.Deferred`. If you need a ``recaptcha.client.captcha.RecaptchaResponse`` to be returned, use the :func:`submit` function, which is an ``@inlineCallbacks`` wrapper for this function. :param str recaptcha_challenge_field: The value of the HTTP POST ``recaptcha_challenge_field`` argument from the form. :param str recaptcha_response_field: The value of the HTTP POST ``recaptcha_response_field`` argument from the form. :param str private_key: The reCAPTCHA API private key. :param str remoteip: An IP address to give to the reCaptcha API server. :rtype: :api:`twisted.internet.defer.Deferred` :returns: A ``Deferred`` which will callback with a ``recaptcha.RecaptchaResponse`` for the request. """ if not (recaptcha_response_field and len(recaptcha_response_field) and recaptcha_challenge_field and len(recaptcha_challenge_field)): d = defer.Deferred() d.addBoth(_ebRequest) # We want `is_valid=False` d.errback(failure.Failure(ValueError('incorrect-captcha-sol'))) return d params = urllib.urlencode({ 'privatekey': _encodeIfNecessary(private_key), 'remoteip': _encodeIfNecessary(remoteip), 'challenge': _encodeIfNecessary(recaptcha_challenge_field), 'response': _encodeIfNecessary(recaptcha_response_field)}) body = _BodyProducer(params) headers = Headers({"Content-type": ["application/x-www-form-urlencoded"], "User-agent": ["reCAPTCHA Python"]}) d = agent.request('POST', API_SSL_VERIFY_URL, headers, body) d.addCallbacks(_cbRequest, _ebRequest) return d