Source code for ibmdbpy.indexing

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------
# Copyright (c) 2015, IBM Corp.
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------

"""
Classes used for subsetting IdaDataFrames.
Indexer currently available : loc
"""
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from future import standard_library
standard_library.install_aliases()

import warnings

import six
from ibmdbpy.exceptions import IdaDataBaseError

#----------------------------------------------------------------------------
# Class for the loc object for IdaDataFrames

[docs]class Loc(object):
        """
        The Loc class is used to select and project IdaDataFrames.
        It implements a Pandas-like interface.
        """
        def __init__(self, idadf):
            self.idadf = idadf

        def __getitem__(self, key):
            """
            Use the loc object of an IdaDataFrame or IdaSeries instance to
            do projection or selection in a table.
            
            Notes
            -----
            The determinism of the result is guaranteed only if the IdaDataFrame
            has a valid indexer. 
            
            Examples
            --------
            >>> idairis.loc[0:49] # Select the first 50 rows
            >>> idairis.loc[2, "SepalLength"] # Select the 3rd rows and column "SepalLength"
            >>> idairis.loc[0:len(idairis), ["SepalLength", "SepalWidth"]] # Select all rows and columns "SepalLength", "SepalWidth"
            """
            if type(key) is tuple:
                if len(key) > 2:
                    raise ValueError("Too many indexer (expected 2)")
                else:
                    index = key[0]
                    cols = key[1]
            else:
                index = key
                cols = None


            if isinstance(index, six.integer_types):
                if self.idadf.indexer is None:
                    if (index < 0)|(index > self.idadf.shape[0]):
                        raise KeyError("The label [%s] is not in the [index]" %(index))
            if isinstance(index, list):
                if self.idadf.indexer is None:
                    if False in [isinstance(x, six.integer_types) for x in index]:
                        raise IdaDataBaseError("The IdaDataFrame has no indexer, so 'index' should be an integer or a list of integers")
                    for x in index:
                        if (x < 0)|(x > self.idadf.shape[0]):
                            raise ValueError("The index [%s] is out of range" %(index))

            if cols is not None:
                if isinstance(cols, six.string_types):
                    if cols not in self.idadf.columns:
                            raise KeyError("The label %s is not in the [columns]" %(cols))
                    newidadf = self.idadf._clone_as_serie(cols)
                else:
                    not_existing = [col for col in cols if col not in self.idadf.columns]
                    if not_existing:
                        raise KeyError("The label [%s] is not in the [columns]" %(not_existing))
                    newidadf = self.idadf._clone()

                for col in newidadf.internal_state.columndict.keys():
                    if col not in cols:
                        del newidadf.internal_state.columndict[col]

                if self.idadf.indexer is not None:
                    if self.idadf.indexer not in cols:
                        newidadf.internal_state.columndict[self.idadf.indexer] = "\""+self.idadf.indexer+"\""

                newidadf.internal_state.index = index
                newidadf.internal_state.update()
                #newidadf._reset_attributes(["shape", "dtypes", "index", "columns"]) # this was causing troubles 

                if self.idadf.indexer is not None:
                    if self.idadf.indexer not in cols:
                        
                        del newidadf.internal_state.columndict[self.idadf.indexer]
                        newidadf.internal_state.update()
                        newidadf._reset_attributes(["shape", "dtypes", "index", "columns"])


            else:
                newidadf = self.idadf._clone()
                newidadf.internal_state.index = index
                newidadf.internal_state.update()
                newidadf._reset_attributes(["shape", "index"])

            if self.idadf.indexer is None:
                if not " ORDER BY " in self.idadf.internal_state.get_state():
                    warnings.warn("Row order is not guaranteed if no indexer" +
                                  " was given and the dataset was not sorted")
            return newidadf