Source code for errorgeopy.geocoders

"""Contains the `Geocoder` and `GeocoderPool` classes, representing one, and a
pool of pre-configured geocoders, respectively.

`Geocoder` is a very thin piece
of wrapping over `geopy.geocoders.base.Geocoder` that primarily just initialises
a `geopy.Geocoder` instance by referring to it by name and passing
configuration.

`GeocoderPool` coordinates reading of configuration (file or dictionary) of a
suite of geocoders that you should configure, although a small number are
available with no configuration. The `GeoCoder` pool then coordinates requests
via individual `Geocoder` objects, handling failures and geocoding in parallel
for the sake of efficiency. Both forward and backward ("reverse") geocoding is
supported, but note that not all geocoding services exposed via `errorgeopy`
support both methods.

.. moduleauthor Richard Law <richard.m.law@gmail.com>
"""

import os
import collections
import warnings
from multiprocessing.dummy import Pool as ThreadPool
from itertools import repeat
from collections import OrderedDict
import copy

import geopy

from errorgeopy.address import Address
from errorgeopy.location import Location
from errorgeopy import utils, DEFAULT_GEOCODER_POOL


def _action(geocoder, query, method, kwargs={}, skip_timeouts=True):
    """Private function, performs a geocoding action.

    Args:
        geocoder (geopy.geocoders.base.Geocoder): A geocoder to run a query
            against.
        query (str, tuple): The address (forward) or location (reverse) you
            wish to geocode.
        method (str): The name of the method to call on the geocoder (e.g.
            "reverse", "geocode").

    Kwargs:
        kwargs (dict): Kwargs for the method.
        skip_timeouts (bool): If a timeout is encountered, controls whether the
            normal exception is raised, or if it should be silently ignored.
    """
    method = getattr(geocoder, method, False)
    assert method and callable(method)
    results = []
    try:
        result = method(query, **kwargs)
    except geopy.exc.GeocoderTimedOut as timeout:
        if not skip_timeouts:
            raise timeout
        else:
            return results
    except NotImplementedError:
        return results
    if not result:
        return results
    results.extend(result if isinstance(result, list) else [result])
    return results


def _geocode(geocoder, query, kwargs={}, skip_timeouts=True):
    """Pickle-able geocoding method that works with any object that implements a
    "geocode" method. Given an address, find locations.

    Notes:
        See :code:`_action` function; this just supplies the :code:`method` to
        that function (as "geocode"). Therefore geocoder must have a callable
        method called "geocode".
    """
    return _action(geocoder, query, 'geocode', kwargs, skip_timeouts)


def _reverse(geocoder, query, kwargs={}, skip_timeouts=True):
    """Pickle-able reverse geocoding method that works with any object that
    implements a "reverse" method. Given a point, find addresses.

    Notes:
        See :code:`_action` function; this just supplies the :code:`method` to
        that function (as "reverse"). Therefore geocoder must have a callable
        method called "reverse".

    Kwargs:
        query (:class:`geopy.point.Point`, list or tuple of (latitude,
            longitude), or string as "%(latitude)s, %(longitude)s")
    """
    return _action(geocoder, query, 'reverse', kwargs, skip_timeouts)


# TODO is it possible to use/inherit a geopy class and extend on the fly?
[docs]class Geocoder(object): """A single geocoder exposing access to a geocoding web service with geopy. Thin wrapping over the geopy.Geocoder set of geocoding services. Used by `errorgeopy.GeocoderPool` to access the configuration of each component service. The base `geopy.Geocoder` object can be obtained via the `geocoder` attribute. """ def __init__(self, name, config): """A single geocoding service with configuration. Args: name (str): Name of the geocoding service. Must be a name used by geopy. config (dict): Configuration for that geocoder, meeting the geopy API. """ self._name = name self._geocode_kwargs = config.pop('geocode') if config.get( 'geocode', None) else {} self._reverse_kwargs = config.pop('reverse') if config.get( 'reverse', None) else {} self._config = config @property def geocoder(self): """The `geopy.Geocoder` instance. """ return geopy.get_geocoder_for_service(self.name)(**self._config) @property def name(self): """The string name of the geocoder. """ return self._name @property def config(self): """The configuration of the geocoder (less the kwargs for the `geocode` and `reverse` methods), as a dictionary. """ return self._config
[docs]class GeocoderPool(object): """A "pool" of objects that inherit from :code:`geopy.geocoders.base.Geocoder`, with configuration specific to each service. Represents the inputs for geocoding operations that span across multiple providers. Queries are run in parallel across all configured geocoding providers, and results are intended to be a composition of multiple responses from different providers with coherent configuration (e.g. a universal :code:`country_bias`), although this is not enforced. """ def __init__(self, config=None, geocoders=None): """Initialises a pool of geocoders to run queries over in parallel. Args: config (dict): A dictionary representing configuration for a suite of geocoders to be used for geocoding queries. geocoders: An iterable array of geopy.Geocoder objects that will be used for geocoding. The `config` options will be used to provide arguments to the `geocode` and `reverse` methods. Notes: The structure of the configuration file (GeocoderPool.fromfile) or dictionary (GeocoderPool.__init__) must match the names of geopy geocoders, their instantiation options, and method signatures for `geocode` and `reverse`. See the `geopy documentation`_ for possible options. Note in particular that for a large number of possible geocoders, authentication tokens are required. They must be included in your configuration; so be careful with including this file in source control or generally sharing it. The default arguments used by geopy will be used if any keyword arguments are absent in the configuration. .. _`geopy documentation`: http://geopy.readthedocs.io/en/latest/ """ self._config = config cfg = copy.deepcopy(config) self._geocoders = DEFAULT_GEOCODER_POOL if config: if not isinstance(config, dict): raise TypeError( "GeocoderPool configuration must be a dictionary") self._geocoders = [Geocoder(gc, cfg[gc]) for gc in cfg] elif geocoders: if not isinstance(geocoders, collections.Iterable): raise TypeError( "GeocoderPool member geocoders must be an iterable set") if not all(isinstance(g, geopy.Geocoder) for f in geocoders): raise TypeError( "GeocoderPool member geocoders must be geopy.geocoder geocoder") self._geocoders = geocoders def __unicode__(self): return '\n'.join([g.name for g in self._geocoders]) def __str__(self): return self.__unicode__() @property def config(self): """The (parsed) configuration that will be referred to when geocoding, as a dictionary. """ return self._config @property def geocoders(self): """The list of unique geocoders that will be used when geocoding. Each member of the array inherits from `geopy.geocoder.base`. """ return self._check_duplicates() def _check_duplicates(self): ''' Checks for duplicate members of the geocoding pool. If any are found, a warning is emitted and duplicates are removed, leaving only unique geocoders. ''' if not len(set(self._geocoders)) == len(self._geocoders): warnings.warn( "Not all supplied geocoders are unique; ignoring duplicate entries") self._geocoders = set(self._geocoders) return self._geocoders @classmethod
[docs] def fromfile(cls, config, caller=None): """Instantiates a GeocoderPool from a configuration file. For example, a `config.yml` file may look like:: ArcGIS: geocode: exactly_one: true reverse: distance: 1000 Nominatim: country_bias: "New Zealand" geocode: addressdetails: true language: en exactly_one: false reverse: exactly_one: true language: en Then you could use this classmethod as follows: >>> import yaml >>> from errorgeopy.geocoders import GeocoderPool >>> gpool = GeocoderPool.fromfile('./config.yml', yaml.load) Args: config (str): path to a configuration file on your system. Kwargs: caller (function): optional method that will parse the config file into a Python dictionary with keys matching GeoPy geocoder names, and those keys holding values that are also dictionaries: function signatures for `geocode` and `reverse`, and any other geocoder-specific configuration (e.g. `country_bias` above). """ if not caller: with open(config, 'r') as cfg: return cls(config=cfg) else: with open(config, 'r') as cfg: return cls(config=caller(cfg))
def _pool_query(self, query, func, attr, callback): """Uses :code:`query` to perform :code:`func` with kwargs :code:`attr` in parallel against all configured geocoders. Performs :code:`callback` function on the result list of addresses or locations. Args: query (str): The query component of a reverse or forward geocode. func (function): Function to use to obtain an answer. attr (dict): Keyword arguments to pass to function for each geocoder. callback (func): Function to run over iterable result. Returns: Output of `callback`. """ pool = ThreadPool() results = pool.starmap(func, zip([g.geocoder for g in self.geocoders], repeat(query), [getattr(g, attr) for g in self.geocoders])) pool.close() pool.join() locations = [] for location in results: if isinstance(location, list): locations.extend(location) else: locations.append(location) # locations = [item for sublist in results for item in sublist] return callback(locations)
[docs] def geocode(self, query): """Forward geocoding: given a string address, return a point location. ErrorGeoPy does this, and also provides you with ways to interrogate the spatial error in the result. Args: query (str): Address you want to find the location of (with spatial error). Returns: A list of `errorgeopy.address.Address` instances. """ return self._pool_query(query, _geocode, '_geocode_kwargs', Location)
[docs] def reverse(self, query): """Reverse geocoding: given a point location, returns a string address. ErrorGeoPy does this, and also provides you with ways to interrogate the uncertainty in the result. Args: query (`geopy.point.Point`, iterable of (lat, lon), or string as "%(latitude)s, %(longitude)s"): The coordinates for which you wish to obtain the closest human-readable addresses. Returns: A list of `errorgeopy.location.Location` instances. """ return self._pool_query(query, _reverse, '_reverse_kwargs', Address)