Source code for libpgm.sampleaggregator

# Copyright (c) 2012, CyberPoint International, LLC
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the CyberPoint International, LLC nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL CYBERPOINT INTERNATIONAL, LLC BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
This module provides tools for collecting and managing sets of samples generated by the library's sampling functions. By averaging a series of samples, the progam can approximate a joint probability distribution without having to do the exact calculations, which may be useful in large networks. 

'''


[docs]class SampleAggregator(object):
    '''
    This class is a machine for aggregating data from sample sequences. It contains the method *aggregate*.
    
    '''
    def __init__(self):
        self.seq = None
        '''The sequence inputted.'''
        self.avg = None
        '''The average of all the entries in *seq*, represented as a dict where each vertex has an entry whose value is a dict of {key, value} pairs, where each key is a possible outcome of that vertex and its value is the approximate frequency.'''


[docs]    def aggregate(self, samplerstatement):
        '''
        Generate a sequence of samples using *samplerstatement* and return the average of its results. 
        
        Arguments:
            1. *samplerstatement* -- The statement of a function (with inputs) that would output a sequence of samples. For example: ``bn.randomsample(50)`` where ``bn`` is an instance of the :doc:`DiscreteBayesianNetwork <discretebayesiannetwork>` class.
        
        This function stores the output of *samplerstatement* in the attribute *seq*, and then averages *seq* and stores the approximate distribution found in the attribute *avg*. It then returns *avg*. 
       
        Usage example: this would print the average of 10 data points::

            import json

            from libpgm.nodedata import NodeData
            from libpgm.graphskeleton import GraphSkeleton
            from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
            from libpgm.sampleaggregator import SampleAggregator

            # load nodedata and graphskeleton
            nd = NodeData()
            skel = GraphSkeleton()
            nd.load("../tests/unittestdict.txt")
            skel.load("../tests/unittestdict.txt")

            # topologically order graphskeleton
            skel.toporder()

            # load bayesian network
            bn = DiscreteBayesianNetwork(skel, nd)

            # build aggregator
            agg = SampleAggregator()

            # average samples
            result = agg.aggregate(bn.randomsample(10))

            # output
            print json.dumps(result, indent=2)

        '''
        
        # get sequence
        seq = samplerstatement
        
        # denominator
        denom = len(seq)
        
        output = dict()
        for key in seq[0].keys():
            output[key] = dict()
            for trial in seq:
                keyss = output[key].keys()
                vall = trial[key]
                if (keyss.count(vall) > 0):
                    output[key][trial[key]] += 1
                else:
                    output[key][trial[key]] = 1
                    
            # normalize
            for entry in output[key].keys():
                output[key][entry] = output[key][entry] / float(denom)
        
        self.seq = seq
        self.avg = output
        
        return output
Navigation

Source code for libpgm.sampleaggregator

Quick search

Navigation