Source code for learning

# -*- coding: utf-8 -*-
"""This module contains the Learning class

.. module author:: François Denis

"""

from __future__ import division, print_function
import numpy as np
import scipy.sparse as sps
import scipy.sparse.linalg as lin
import math
from sp2learn.sample import Sample
from sp2learn.hankel import Hankel
import sp2learn.automaton as AC

[docs]class Learning(object): """ A learning instance :Example: >>> from sp2learn import Learning, Sample >>> train_file = '0.spice.train' >>> pT = Sample(adr=train_file) >>> S_app = Learning(sample_instance=pT) - Input: :param Sample sample_instance: a an instance of Sample (nbL, nbEx, and dictionaries ) """ def __init__(self, sample_instance): # # Size of the alphabet # self.nbL = sample_instance.nbL # # Number of samples # self.nbEx = sample_instance.nbEx # # The dictionary that contains the sample # self.sample = sample_instance.sample # # The dictionary that contains the prefixes # self.pref = sample_instance.pref # # The dictionary that contains the suffixes # self.suff = sample_instance.suff # # The dictionary that contains the factors # self.fact = sample_instance.fact # The Sample object that contains Samples dictionaries self.sample_object = sample_instance @property def sample_object(self): """Sample object, contains dictionaries""" return self._sample_object @sample_object.setter def sample_object(self, sample_object): if not isinstance(sample_object, Sample): raise TypeError("sample_object should be a Sample") self._sample_object = sample_object # # @property # def nbL(self): # """Size of the alphabet""" # return self._nbL # # @nbL.setter # def nbL(self, nbL): # if not isinstance(nbL, int): # raise TypeError("nbL should be an integer") # if nbL < 0: # raise ValueError("The size of the alphabet should " + # "an integer >= 0") # self._nbL = nbL # # @property # def nbEx(self): # """Number of examples""" # return self._nbEx # # @nbEx.setter # def nbEx(self, nbEx): # if not isinstance(nbEx, int): # raise TypeError("nbEx should be an integer") # if nbEx < 0: # raise ValueError("The number of examples should be " + # " an integer >= 0") # self._nbEx = nbEx # # @property # def sample(self): # """The dictionary that contains the sample """ # return self._sample # # @sample.setter # def sample(self, sample): # if isinstance(sample, dict): # self._sample = sample # else: # raise TypeError("sample should be a dictionary.") # # @property # def pref(self): # """The dictionary that contains the prefixes""" # return self._pref # # @pref.setter # def pref(self, pref): # if isinstance(pref, dict): # self._pref = pref # else: # raise TypeError("pref should be a dictionary.") # # @property # def suff(self): # """The dictionary that contains the suffixes""" # return self._suff # # @suff.setter # def suff(self, suff): # if isinstance(suff, dict): # self._suff = suff # else: # raise TypeError("suff should be a dictionary.") # # @property # def fact(self): # """The dictionary that contains the factors""" # return self._fact # # @fact.setter # def fact(self, fact): # if isinstance(fact, dict): # self._fact = fact # else: # raise TypeError("fact should be a dictionary.") # # @staticmethod
[docs] def BuildAutomatonFromHankel(lhankel, nbL, rank, sparse=False): """ Build an automaton from Hankel matrix - Input: :param list lhankel: list of Hankel matrix :param int nbL: the number of letters :param int rank: the ranking number :param boolean sparse: (default value = False) True if Hankel matrix is sparse - Output: :returns: An automaton instance :rtype: Automaton """ print ("Start Build Automaton from Hankel matrix") if not sparse: hankel = lhankel[0] [u, s, v] = np.linalg.svd(hankel) u = u[:, :rank] v = v[:rank, :] # ds = np.zeros((rank, rank), dtype=complex) ds = np.diag(s[:rank]) pis = np.linalg.pinv(v) del v pip = np.linalg.pinv(np.dot(u, ds)) del u, ds init = np.dot(hankel[0, :], pis) term = np.dot(pip, hankel[:, 0]) trans = [] for x in range(nbL): hankel = lhankel[x+1] trans.append(np.dot(pip, np.dot(hankel, pis))) else: hankel = lhankel[0] [u, s, v] = lin.svds(hankel, k=rank) ds = np.diag(s) pis = np.linalg.pinv(v) del v pip = np.linalg.pinv(np.dot(u, ds)) del u, ds init = hankel[0, :].dot(pis)[0, :] term = np.dot(pip, hankel[:, 0].toarray())[:, 0] trans = [] for x in range(nbL): hankel = lhankel[x+1] trans.append(np.dot(pip, hankel.dot(pis))) A = AC.Automaton(nbL, rank, init, term, trans) # ms=np.linalg.pinv(vt) # init=h.dot(ms)/self.nbEx # print(type(init),init.shape) # mp=np.linalg.pinv(u.dot(np.diag(s))) # v=v.todense() # term=np.dot(mp,v) # trans=[] # a suivre print ("End of compute Automaton") return A
[docs] def LearnAutomaton(self, rank, lrows=[], lcolumns=[], version="classic", partial=False, sparse=False): """ Learn Automaton from sample - Input: :param int rank: the ranking number :param lrows: number or list of rows, a list of strings if partial=True; otherwise, based on self.pref if version="classic" or "prefix", self.fact otherwise :type lrows: int or list of int :param lcolumns: number or list of columns a list of strings if partial=True ; otherwise, based on self.suff if version="classic" or "suffix", self.fact otherwise :type lcolumns: int or list of int :param string version: (default = "classic") version name :param boolean partial: (default value = False) build of partial Hankel matrix :param boolean sparse: (default value = False) True if Hankel matrix is sparse - Output: :returns: An automaton instance :rtype: Automaton """ lhankel = Hankel(sample_instance=self.sample_object, lrows=lrows, lcolumns=lcolumns, version=version, partial=partial, sparse=sparse).lhankel matrix_shape =min(lhankel[0].shape) if (min(lhankel[0].shape) < rank) : raise ValueError("The number of rank "+ str(rank) + "should be <= to " + "Hankel Matrix shape " + str(matrix_shape) ) A = self.BuildAutomatonFromHankel(lhankel=lhankel, nbL=self.sample_object.nbL, rank=rank, sparse=sparse) A.initial = A.initial / self.sample_object.nbEx if version == "prefix": A = A.transformation(source="prefix", target="classic") if version == "factor": A = A.transformation(source="factor", target="classic") return A
@staticmethod
[docs] def Perplexity(A, adr): """ Perplexity calculation """ Cible = AC.Automaton.load_Spice_Automaton("./" + adr + ".target") Test = Learning(adr="./"+adr+".test") sA, sC = 0, 0 for w in Test.sample_object.sample: sA = sA + abs(A.val(w)) sC = sC + abs(Cible.val(w)) s = 0 for w in Test.sample_object.sample: s = s + Cible.val(w)/sC*math.log(abs(A.val(w))/sA) p = math.exp(-s) return p
# if __name__ == '__main__': # from skgilearn.datasets.get_dataset_path import get_dataset_path # adr = get_dataset_path("essai") # P = Learning(adr=adr, type='SPiCe') # # print("nbL = " + str(P.nbL)) # print("nbEx = " + str(P.nbEx)) # print("samples = " + str(P.sample)) # print("prefixes = " + str(P.pref)) # print("suffixes = " + str(P.suff)) # print("factors = " + str(P.fact))