# -*- coding: utf-8 -*-
"""This module contains the Hankel class
"""
from __future__ import division, print_function
import scipy.sparse as sps
import numpy as np
[docs]class Hankel(object):
""" A Hankel instance , compute the list of Hankel matrix
:Example:
>>> from sp2learn import Sample, Learning, Hankel
>>> train_file = '0.spice.train'
>>> pT = Sample(adr=train_file)
>>> S_app = Learning(sample_instance=pT)
>>> lhankel = Hankel( sample=pT.sample, pref=pT.pref,
>>> suff=pT.suff, fact=pT.fact,
>>> nbL=pT.nbL, nbEx=pT.nbEx,
>>> lrows=6, lcolumns=6, version="classic",
>>> partial=True, sparse=True).lhankel
- Input:
:param dict sample: sample dictionary
:param dict pref: prefix dictionary
:param dict suff: suffix dictionary
:param dict fact: factor dictionary
:param int nbL: the number of letters
:param int nbS: the number of states
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on self.pref if version="classic" or
"prefix", self.fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on self.suff if version="classic" or "suffix",
self.fact otherwise
:type lcolumns: int or list of int
:param string version: (default = "classic") version name
:param boolean partial: (default value = False) build of partial
:param boolean sparse: (default value = False) True if Hankel
matrix is sparse
"""
def __init__(
self, sample_instance,
lrows=[], lcolumns=[],
version="classic", partial=False, sparse=False ):
# Size of the alphabet
self.nbL = sample_instance.nbL
# Number of samples
self.nbEx = sample_instance.nbEx
self.version = version
self.partial = partial
self.sparse = sparse
self.lhankel = self.build(sample=sample_instance.sample,
pref=sample_instance.pref,
suff=sample_instance.suff,
fact=sample_instance.fact,
lrows=lrows, lcolumns=lcolumns)
@property
def nbL(self):
"""Number of letters"""
return self._nbL
@nbL.setter
def nbL(self, nbL):
if not isinstance(nbL, int):
raise TypeError("nbL should be an integer")
if nbL < 0:
raise ValueError("The size of the alphabet should " +
"an integer >= 0")
self._nbL = nbL
@property
def nbEx(self):
"""Number of examples"""
return self._nbEx
@nbEx.setter
def nbEx(self, nbEx):
if not isinstance(nbEx, int):
raise TypeError("nbEx should be an integer")
if nbEx < 0:
raise ValueError("The number of examples should be " +
" an integer >= 0")
self._nbEx = nbEx
[docs] def build(self, sample, pref, suff, fact, lrows, lcolumns):
""" Create a Hankel matrix
- Input:
:param dict sample: sample dictionary
:param dict pref: prefix dictionary
:param dict suff: suffix dictionary
:param dict fact: factor dictionary
:param lrows: number or list of rows,
a list of strings if partial=True;
otherwise, based on self.pref if version="classic" or
"prefix", self.fact otherwise
:type lrows: int or list of int
:param lcolumns: number or list of columns
a list of strings if partial=True ;
otherwise, based on self.suff if version="classic" or "suffix",
self.fact otherwise
:type lcolumns: int or list of int
- Output:
:returns: list lhankel, list of hankel matrix,
a DoK based sparse matrix or nuppy matrix based not sparse
:rtype: list of matrix
"""
# calcul des lignes lrows et colonnes lcolumns
print ("Start compute Hankel matrix")
if not self.partial:
(lrows, lcolumns) = self._build_not_partial(
pref=pref, suff=suff, fact=fact)
else:
(lrows, lcolumns) = self._build_partial(
pref=pref, suff=suff, fact=fact,
lrows=lrows, lcolumns=lcolumns)
lhankel = self._create_hankel(sample=sample, pref=pref,
suff=suff, fact=fact,
lrows=lrows, lcolumns=lcolumns)
print ("End of compute Hankel matrix")
return lhankel
def _build_not_partial(self,pref, suff, fact):
version = self.version
if version == "classic":
lrows = pref.keys()
lcolumns = suff.keys()
elif version == "prefix":
lrows = pref.keys()
lcolumns = fact.keys()
elif version == "suffix":
lrows = fact.keys()
lcolumns = suff.keys()
else:
lrows = fact.keys()
lcolumns = fact.keys()
return (lrows, lcolumns)
def _build_partial(self,
pref, suff, fact,
lrows, lcolumns):
version = self.version
if version == "classic":
(lrows, lcolumns) = self._construc_partial_lrows_lcolumns(
dict_first=pref, dict_second=suff,
lrows=lrows, lcolumns=lcolumns)
elif version == "prefix":
(lrows, lcolumns) = self._construc_partial_lrows_lcolumns(
dict_first=pref, dict_second=fact,
lrows=lrows, lcolumns=lcolumns)
elif version == "suffix":
(lrows, lcolumns) = self._construc_partial_lrows_lcolumns(
dict_first=fact, dict_second=suff,
lrows=lrows, lcolumns=lcolumns)
else:
(lrows, lcolumns) = self._construc_partial_lrows_lcolumns(
dict_first=fact, dict_second=fact,
lrows=lrows, lcolumns=lcolumns)
return lrows, lcolumns
def _construc_partial_lrows_lcolumns(self, dict_first, dict_second,
lrows,
lcolumns):
if isinstance(lrows, int):
longmax = lrows
lrows = [w for w in dict_first if len(w) <= longmax]
else:
s_first = set(dict_first) # corresponding set
lrows = [w for w in lrows if w in s_first]
if isinstance(lcolumns, int):
longmax = lcolumns
lcolumns = [w for w in dict_second if len(w) <= longmax]
else:
s_second = set(dict_second) # corresponding set
lcolumns = [w for w in lcolumns if w in s_second]
return (lrows, lcolumns)
def _create_hankel(self, sample, pref, suff, fact, lrows, lcolumns):
version = self.version
sparse = self.sparse
(drows, dcolumns) = self._sorted_rows_columns(lrows, lcolumns)
nbRows = len(lrows)
nbColumns = len(lcolumns)
srows = set(lrows)
scolumns = set(lcolumns)
if sparse:
lhankel = [sps.dok_matrix((nbRows, nbColumns)) for
i in range(self.nbL+1)]
else:
lhankel = [np.zeros((nbRows, nbColumns)) for
k in range(self.nbL+1)]
if version == "classic":
dsample = sample
elif version == "prefix":
dsample = pref
elif version == "suffix":
dsample = suff
else:
dsample = fact
for w in dsample:
for i in range(len(w)+1):
if w[:i] in srows:
if w[i:] in scolumns:
lhankel[0][drows[w[:i]], dcolumns[w[i:]]] = dsample[w]
if (i < len(w) and w[i+1:] in scolumns):
lhankel[w[i]+1][drows[w[:i]],
dcolumns[w[i+1:]]] = dsample[w]
# for h in lhankel:
# print("h")
# print(h)
# print(spsl.svds(hankel,k=6,return_singular_vectors=False))
return lhankel
def _sorted_rows_columns(self, lrows, lcolumns):
nbRows = len(lrows)
nbColumns = len(lcolumns)
lrows = sorted(lrows, key=lambda x: (len(x), x))
drows = {lrows[i]: i for i in range(nbRows)}
lcolumns = sorted(lcolumns, key=lambda x: (len(x), x))
dcolumns = {lcolumns[i]: i for i in range(nbColumns)}
return (drows, dcolumns)