Source code for rasterstats.main

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
import numpy as np
import warnings
from affine import Affine
from shapely.geometry import shape
from .io import read_features, Raster
from .utils import (rasterize_geom, get_percentile, check_stats,
                    remap_categories, key_assoc_val, boxify_points)


[docs]def raster_stats(*args, **kwargs): """Deprecated. Use zonal_stats instead.""" warnings.warn("'raster_stats' is an alias to 'zonal_stats'" " and will disappear in 1.0", DeprecationWarning) return zonal_stats(*args, **kwargs)
[docs]def zonal_stats(*args, **kwargs): """The primary zonal statistics entry point. All arguments are passed directly to ``gen_zonal_stats``. See its docstring for details. The only difference is that ``zonal_stats`` will return a list rather than a generator.""" return list(gen_zonal_stats(*args, **kwargs))
[docs]def gen_zonal_stats( vectors, raster, layer=0, band_num=1, nodata=None, affine=None, stats=None, all_touched=False, categorical=False, category_map=None, add_stats=None, raster_out=False, prefix=None, geojson_out=False, **kwargs): """Zonal statistics of raster values aggregated to vector geometries. Parameters ---------- vectors: path to an vector source or geo-like python objects raster: ndarray or path to a GDAL raster source If ndarray is passed, the ``affine`` kwarg is required. layer: int or string, optional If `vectors` is a path to an fiona source, specify the vector layer to use either by name or number. defaults to 0 band_num: int, optional If `raster` is a GDAL source, the band number to use (counting from 1). defaults to 1. nodata: float, optional If `raster` is a GDAL source, this value overrides any NODATA value specified in the file's metadata. If `None`, the file's metadata's NODATA value (if any) will be used. defaults to `None`. affine: Affine instance required only for ndarrays, otherwise it is read from src stats: list of str, or space-delimited str, optional Which statistics to calculate for each zone. All possible choices are listed in ``utils.VALID_STATS``. defaults to ``DEFAULT_STATS``, a subset of these. all_touched: bool, optional Whether to include every raster cell touched by a geometry, or only those having a center point within the polygon. defaults to `False` categorical: bool, optional category_map: dict A dictionary mapping raster values to human-readable categorical names. Only applies when categorical is True add_stats: dict with names and functions of additional stats to compute, optional raster_out: boolean Include the masked numpy array for each feature?, optional Each feature dictionary will have the following additional keys: mini_raster_array: The clipped and masked numpy array mini_raster_affine: Affine transformation mini_raster_nodata: NoData Value prefix: string add a prefix to the keys (default: None) geojson_out: boolean Return list of GeoJSON-like features (default: False) Original feature geometry and properties will be retained with zonal stats appended as additional properties. Use with `prefix` to ensure unique and meaningful property names. Returns ------- generator of dicts (if geojson_out is False) Each item corresponds to a single vector feature and contains keys for each of the specified stats. generator of geojson features (if geojson_out is True) GeoJSON-like Feature as python dict """ stats, run_count = check_stats(stats, categorical) # Handle 1.0 deprecations transform = kwargs.get('transform') if transform: warnings.warn("GDAL-style transforms will disappear in 1.0. " "Use affine=Affine.from_gdal(*transform) instead", DeprecationWarning) if not affine: affine = Affine.from_gdal(*transform) ndv = kwargs.get('nodata_value') if ndv: warnings.warn("Use `nodata` instead of `nodata_value`", DeprecationWarning) if not nodata: nodata = ndv cp = kwargs.get('copy_properties') if cp: warnings.warn("Use `geojson_out` to preserve feature properties", DeprecationWarning) with Raster(raster, affine, nodata, band_num) as rast: features_iter = read_features(vectors, layer) for i, feat in enumerate(features_iter): geom = shape(feat['geometry']) if 'Point' in geom.type: geom = boxify_points(geom, rast) geom_bounds = tuple(geom.bounds) fsrc = rast.read(bounds=geom_bounds) # create ndarray of rasterized geometry rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) assert rv_array.shape == fsrc.shape # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 for the correct mask effect # we also mask out nodata values explicitly masked = np.ma.MaskedArray( fsrc.array, mask=np.logical_or( fsrc.array == fsrc.nodata, np.logical_not(rv_array))) if masked.compressed().size == 0: # nothing here, fill with None and move on feature_stats = dict([(stat, None) for stat in stats]) if 'count' in stats: # special case, zero makes sense here feature_stats['count'] = 0 else: if run_count: keys, counts = np.unique(masked.compressed(), return_counts=True) pixel_count = dict(zip([np.asscalar(k) for k in keys], [np.asscalar(c) for c in counts])) if categorical: feature_stats = dict(pixel_count) if category_map: feature_stats = remap_categories(category_map, feature_stats) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'majority' in stats: feature_stats['majority'] = float(key_assoc_val(pixel_count, max)) if 'minority' in stats: feature_stats['minority'] = float(key_assoc_val(pixel_count, min)) if 'unique' in stats: feature_stats['unique'] = len(list(pixel_count.keys())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin for pctile in [s for s in stats if s.startswith('percentile_')]: q = get_percentile(pctile) pctarr = masked.compressed() feature_stats[pctile] = np.percentile(pctarr, q) if 'nodata' in stats: featmasked = np.ma.MaskedArray(fsrc.array, mask=np.logical_not(rv_array)) feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum()) if add_stats is not None: for stat_name, stat_func in add_stats.items(): feature_stats[stat_name] = stat_func(masked) if raster_out: feature_stats['mini_raster_array'] = masked feature_stats['mini_raster_affine'] = fsrc.affine feature_stats['mini_raster_nodata'] = fsrc.nodata if prefix is not None: prefixed_feature_stats = {} for key, val in feature_stats.items(): newkey = "{}{}".format(prefix, key) prefixed_feature_stats[newkey] = val feature_stats = prefixed_feature_stats if geojson_out: for key, val in feature_stats.items(): if 'properties' not in feat: feat['properties'] = {} feat['properties'][key] = val yield feat else: yield feature_stats