Source code for chicken_turtle_util.series

# Copyright (C) 2016 VIB/BEG/UGent - Tim Diels <timdiels.m@gmail.com>
# 
# This file is part of Chicken Turtle Util.
# 
# Chicken Turtle Util is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# Chicken Turtle Util is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public License
# along with Chicken Turtle Util.  If not, see <http://www.gnu.org/licenses/>.

'''
Utilities for working with `pandas.Series`. Contains only `invert`, swaps series' index with its values
'''

import chicken_turtle_util.data_frame as df_

[docs]def invert(series): ''' Swap index with values of series Parameters ---------- series Series to swap on, must have a name Returns ------- pandas.Series Series after swap See also -------- pandas.Series.map Joins series ``a -> b`` and ``b -> c`` into ``a -> c``. ''' df = series.reset_index() #TODO alt is to to_frame and then use som dataframe methods df.set_index(series.name, inplace=True) return df[df.columns[0]]
[docs]def split(series): ''' Split values The index is dropped, but this may change in the future. Parameters ---------- series : pd.Series Series with numpy array-like values. Returns ------- pd.Series Series with values split across rows. Examples -------- >>> series = pd.Series([[1,2],[1,2],[3,4,5]]) >>> series 0 [1, 2] 1 [1, 2] 2 [3, 4, 5] dtype: object >>> split(series) 0 1 1 2 2 1 3 2 4 3 5 4 6 5 dtype: object ''' s = df_.split_array_like(series.apply(list).to_frame('column'), 'column')['column'] s.name = series.name return s
[docs]def equals(series1, series2, ignore_order=False, ignore_index=False, all_close=False, _return_reason=False): ''' Get whether 2 series are equal ``NaN``\ s are considered equal (which is consistent with `pandas.Series.equals`). ``None`` is considered equal to ``NaN``. Parameters ---------- series1, series2 : pd.Series Series to compare ignore_order : bool Ignore order of values (and index) ignore_index : bool Ignore index values and name. all_close : bool If False, values must match exactly, if True, floats are compared as if compared with `np.isclose`. _return_reason : bool Internal. If True, `equals` returns a tuple containing the reason, else `equals` only returns a bool indicating equality (or equivalence rather). Returns ------- equal : bool Whether they're equal (after ignoring according to the parameters) reason : str or None If equal, ``None``, otherwise short explanation of why the data frames aren't equal. Omitted if not `_return_reason`. See also -------- data_frame.equals : Get whether 2 data frames are equal Notes ----- All values (including those of indices) must be copyable and `__eq__` must be such that a copy must equal its original. A value must equal itself unless it's `np.nan`. Values needn't be orderable or hashable (however pandas requires index values to be orderable and hashable). By consequence, this is not an efficient function, but it is flexible. ''' result = _equals(series1, series2, ignore_order, ignore_index, all_close) if _return_reason: return result else: return result[0]
def _equals(series1, series2, ignore_order, ignore_index, all_close): if not ignore_index: if series1.name != series2.name: return False, 'Series name differs: {!r} != {!r}'.format(series1.name, series2.name) return df_.equals( series1.to_frame(), series2.to_frame(), ignore_order={0} if ignore_order else set(), ignore_indices={0} if ignore_index else set(), all_close=all_close, _return_reason=True #TODO the reasons will be about dataframes, this is confusing. May need to copy paste after all and do the analog for a series. Or add an internal param so it outputs series info (pick the former option) )
[docs]def assert_equals(series1, series2, ignore_order=False, ignore_index=False, all_close=False): ''' Assert 2 series are equal Like ``assert equals(series1, series2, ...)``, but with better hints at where the series differ. See :func:`chicken_turtle_util.series.equals` for detailed parameter doc. Parameters ---------- series1, series2 : pd.Series ignore_order : bool ignore_index : bool all_close : bool ''' equals_, reason = equals(series1, series2, ignore_order, ignore_index, all_close, _return_reason=True) assert equals_, '{}\n\n{}\n\n{}'.format(reason, series1.to_string(), series2.to_string())