Source code for lspi.sample

# -*- coding: utf-8 -*-
"""Contains class representing an LSPI sample."""


[docs]class Sample(object):

    """Represents an LSPI sample tuple ``(s, a, r, s', absorb)``.

    Parameters
    ----------

    state : numpy.array
        State of the environment at the start of the sample.
        ``s`` in the sample tuple.
        (The usual type is a numpy array.)
    action : int
        Index of action that was executed.
        ``a`` in the sample tuple
    reward : float
        Reward received from the environment.
        ``r`` in the sample tuple
    next_state : numpy.array
        State of the environment after executing the sample's action.
        ``s'`` in the sample tuple
        (The type should match that of state.)
    absorb : bool, optional
        True if this sample ended the episode. False otherwise.
        ``absorb`` in the sample tuple
        (The default is False, which implies that this is a
        non-episode-ending sample)


    Assumes that this is a non-absorbing sample (as the vast majority
    of samples will be non-absorbing).

    This class is just a dumb data holder so the types of the different
    fields can be anything convenient for the problem domain.

    For states represented by vectors a numpy array works well.

    """

    def __init__(self, state, action, reward, next_state, absorb=False):
        """Initialize Sample instance."""
        self.state = state
        self.action = action
        self.reward = reward
        self.next_state = next_state
        self.absorb = absorb

[docs]    def __repr__(self):
        """Create string representation of tuple."""
        return 'Sample(%s, %s, %s, %s, %s)' % (self.state,
                                               self.action,
                                               self.reward,
                                               self.next_state,
                                               self.absorb)