Source code for ibmdbpy.aggregation

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015, IBM Corp.
# All rights reserved.
# Distributed under the terms of the BSD Simplified License.
# The full license is in the LICENSE file, distributed with this software.

The module contains the function that is used to modify or
create columns in an IdaDataFrame based on aggreation

from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from builtins import str
from future import standard_library

from numbers import Number
from collections import OrderedDict

import ibmdbpy
from ibmdbpy.exceptions import IdaDataBaseError

def aggregate_idadf(idadf, method, other, swap = False):
    Modify internal state variables to represent the aggregation of columns
    of an IdaDataFrame or IdaSeries in ibmdbpy.

    The following comparison operators are supported: +, \*, /, -, //, %, \*\*.

    The syntax is similar to Pandas.

    idadf : IdaDataFrame or IdaSeries
        IdaDataFrame or IdaSerie on the left (if swap is False)
    method : str
        Aggregation method that is computed: the following values are 
        admissible: "add","mul","div","sub","floordiv","mod","neg","pow"
    other: Number or IdaDataFrame or IdaSeries
        Another object that idadf will be aggregated with (on the right if swap is False).
    swap : bool, default: False
        Internally used to handle cases where the call is made reflexively,
        that is when the main IdaDataFrame/IdaSeries is not on the left.
        If swap is True, this also implies that other is not of type IdaDataFrame/IdaSeries.

    Aggregated IdaDataFrame or IdaSeries

        Aggregation method not supported.
         Type not supported for aggregation.

    >>> idairis['SepalLength'] = idairis['SepalLength'] * 2

    It is not possible to create aggregations between columns that are stored 
    in different dashDB/DB2 tables.

    def swap_manager(left, right, swap = False):
        if swap:
            left, right = right, left
        return (left, right)

    #Swap values in case of reflexive call
    # TODO : Override in IdaSeries instead of including the logic here.
    if swap:
        idadf, other = other, idadf

    simplemethod = {"add": " + ", "mul": " * ",  "div": " / ", "sub": " - "}
    complexmethod = {"floordiv" : " FLOOR(%s/%s) ",
                     "mod" : " MOD(%s,%s) ",
                     "neg" : " -%s%s ",
                     "pow" : " POWER(%s,%s)"} # overflow risk, to handle

    all_methods = list(simplemethod.keys())+list(complexmethod.keys())
    if method not in all_methods:
        raise ValueError("Admissible values for method argument are %s." %str(all_methods)[1:-1])

    columndict = OrderedDict()

    if isinstance(idadf, ibmdbpy.IdaDataFrame):

        for index, column in enumerate(idadf.internal_state.columndict.keys()):
            column_value = idadf.internal_state.columndict[column]
            if other is None: # this is for now just the neg case
                left, right = swap_manager(column_value, '')
            elif isinstance(other, Number):
                left, right = swap_manager(column_value, other, swap)
            elif isinstance(other, ibmdbpy.IdaSeries):
                left, right = swap_manager(column_value, "%s"%list(other.internal_state.columndict.values())[0], swap)
            elif isinstance(other, ibmdbpy.IdaDataFrame):
                if len(idadf.columns) != len(other.columns):
                    if len(other.columns) != 1:
                        raise IdaDataBaseError("Number of columns of other "+
                                               "IdaDataFrame should be either "+
                                               "equal to aggregated IdaDataFrame"+
                                               "or equal to 1.")
                    left, right = swap_manager("%s"%column_value, "%s"%list(other.internal_state.columndict.values())[0], swap)
                    left, right = swap_manager("%s"%column_value, "%s"%list(other.internal_state.columndict.values())[index], swap)
                raise TypeError("Aggregation method not supported. Unsupported type for aggregation: %s"%type(other))

            if method in simplemethod:
                columndict[column] = "(%s%s%s)"%(left, simplemethod[method], right)
            elif method in complexmethod:
                agg = complexmethod[method] %(left, right)
                columndict[column] = "(%s)"%agg

        newidadf = idadf._clone()
        for key,value in columndict.items():
            newidadf.internal_state.columndict[key] = value

        # REMARK: Don't need to reset some attributes ?
        return newidadf

    if isinstance(idadf, ibmdbpy.IdaSeries):
        columnname = idadf.internal_state.columndict.keys()[0]
        if other is None: # this is for now just the neg case
            left, right = swap_manager("\"%s\""%columnname, '')
        elif isinstance(other, Number):
            left, right = swap_manager("\"%s\""%columnname, other, swap)
        elif isinstance(other, ibmdbpy.IdaSeries):
            left, right = swap_manager("\"%s\""%columnname, "\"%s\""%other.columns[0], swap)
            raise TypeError("Type not supported for aggregation: " + str(type(other)))

        if method in simplemethod:
            columndict[columnname] = "(%s%s%s)"%(left, simplemethod[method], right)
        elif method in complexmethod:
            agg = complexmethod[method] %(left, right)
            columndict[columnname] = "(%s)"%agg

        newidaseries = idadf._clone()
        newidaseries.internal_state.columndict[key] = columndict[columnname]
        return newidaseries