Source code for load

# -*- coding: utf-8 -*-
"""This module contains the Load class
   The load class load objects from file
"""

import pickle

[docs]class Load(object): """ A load instance :Example: >>> from sp2learn import Load >>> l = Load('adr').load_Spice_Sample() - Input: :param string adr: address and name of the loaden file :param string type: (default value = 'SPiCe') indicate the structure of the file """ def __init__(self, adr, type='SPiCe'): self._type = type self._adr = adr
[docs] def load_Spice_Sample(self, lrows=[], lcolumns=[], version="classic", partial=False): """ Load a sample from a Spice file and returns a dictionary (word,count) - Input: :param lrows: number or list of rows, a list of strings if partial=True; otherwise, based on pref if version="classic" or "prefix", fact otherwise :type lrows: int or list of int :param lcolumns: number or list of columns a list of strings if partial=True ; otherwise, based on suff if version="classic" or "suffix", fact otherwise :type lcolumns: int or list of int :param string version: (default = "classic") version name :param boolean partial: (default value = False) build of partial if True partial dictionaries are loaded based on nrows and lcolumns - Output: :returns: nbL , nbEx , dsample , dpref , dsuff , dfact :rtype: int , int , dict , dict , dict , dict """ adr = self._adr type = self._type if type == 'SPiCe': if partial: return self._load_Spice_partial( adr=adr, lrows=lrows, lcolumns=lcolumns, version=version) if not partial: return self._load_Spice_Notpartial(adr=adr)
def _load_Spice_partial(self, adr, lrows=[], lcolumns=[], version='classic'): dsample = {} # dictionary (word,count) dpref = {} dsuff = {} dfact = {} f = open(adr, "r") line = f.readline() l = line.split() nbEx = int(l[0]) nbL = int(l[1]) line = f.readline() if isinstance(lrows, int): lrowsmax = lrows version_rows_int = True else: version_rows_int = False lrowsmax = lrows.__len__() if isinstance(lcolumns, int): lcolumnsmax = lcolumns version_columns_int = True else: lcolumnsmax = lcolumns.__len__() version_columns_int = False lmax = lrowsmax + lcolumnsmax while line: l = line.split() w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]]) if version == "classic": dsample[w] = dsample[w] + 1 if w in dsample else 1 if version == "prefix" or version == "classic": # traitement du mot vide pour les préfixes, suffixes et facteurs dpref[()] = dpref[()] + 1 if () in dpref else 1 if version == "suffix" or version == "classic": dsuff[()] = dsuff[()] + 1 if () in dsuff else 1 if version == "factor": dfact[()] = dfact[()] + len(w) + 1 if () in dfact else len(w) + 1 for i in range(len(w)): if version == "classic": # dictionaries dpref and dsuff are populated until # respectively lrows and lcolumns if (version_rows_int is True and i + 1 <= lrowsmax) or \ (version_rows_int is False and w[:i + 1] in lrows): dpref[w[:i + 1]] = \ dpref[w[:i + 1]] + 1 if w[:i + 1] in dpref else 1 if (version_columns_int is True and i + 1 <= lcolumnsmax) or\ (version_columns_int is False and w[-(i + 1):] in lcolumns): dsuff[w[-(i + 1):]] = dsuff[w[-(i + 1):]] + 1 \ if w[-(i + 1):] in dsuff else 1 if version == "prefix": # dictionaries dpref is populated until # lmax = lrows + lcolumns # dictionaries dfact is populated until lcolumns if ((version_rows_int is True or version_columns_int is True) and i + 1 <= lmax) or\ (version_rows_int is False and (w[:i + 1] in lrows)) or\ (version_columns_int is False and (w[:i + 1] in lcolumns)): dpref[w[:i + 1]] = dpref[w[:i + 1]] + 1 \ if w[:i + 1] in dpref else 1 for j in range(i + 1, len(w) + 1): if (version_columns_int is True and (j - i) <= lmax) or \ (version_columns_int is False and (w[i:j] in lcolumns )): dfact[w[i:j]] = dfact[w[i:j]] + 1 \ if w[i:j] in dfact else 1 if version == "suffix": if ((version_rows_int is True or version_columns_int is True) and i <= lmax) or\ (version_rows_int is False and (w[-(i + 1):] in lrows)) or\ (version_columns_int is False and (w[-(i + 1):] in lcolumns)): dsuff[w[-(i + 1):]] = dsuff[w[-(i + 1):]] + 1 \ if w[-(i + 1):] in dsuff else 1 for j in range(i + 1, len(w) + 1): if (version_rows_int is True and (j - i) <= lmax) or \ (version_rows_int is False and (w[i:j] in lrows )): dfact[w[i:j]] = dfact[w[i:j]] + 1 \ if w[i:j] in dfact else 1 if version == "factor": for j in range(i + 1, len(w) + 1): if ((version_rows_int is True or version_columns_int is True) and (j - i) <= lmax) or \ (version_rows_int is False and (w[i:j] in lrows)) or \ (version_columns_int is False and (w[i:j] in lcolumns)): dfact[w[i:j]] = \ dfact[w[i:j]] + 1 if w[i:j] in dfact else 1 line = f.readline() f.close() self._create_pickle_files(adr=adr, dsample=dsample, dpref=dpref, dsuff=dsuff, dfact=dfact) return nbL, nbEx, dsample, dpref, dsuff, dfact def _load_Spice_Notpartial(self, adr): dsample = {} # dictionary (word,count) dpref = {} dsuff = {} dfact = {} f = open(adr, "r") line = f.readline() l = line.split() nbEx = int(l[0]) nbL = int(l[1]) line = f.readline() while line: l = line.split() w = () if int(l[0]) == 0 else tuple([int(x) for x in l[1:]]) dsample[w] = dsample[w] + 1 if w in dsample else 1 # traitement du mot vide pour les préfixes, suffixes et facteurs dpref[()] = dpref[()] + 1 if () in dpref else 1 dsuff[()] = dsuff[()] + 1 if () in dsuff else 1 dfact[()] = dfact[()] + len(w) + 1 if () in dfact else len(w) + 1 for i in range(len(w)): dpref[w[:i + 1]] = dpref[w[:i + 1]] + 1 \ if w[:i + 1] in dpref else 1 dsuff[w[i:]] = dsuff[w[i:]] + 1 if w[i:] in dsuff else 1 for j in range(i + 1, len(w) + 1): dfact[w[i:j]] = dfact[w[i:j]] + 1 if w[i:j] in dfact else 1 line = f.readline() f.close() self._create_pickle_files(adr=adr, dsample=dsample, dpref=dpref, dsuff=dsuff, dfact=dfact) return nbL, nbEx, dsample, dpref, dsuff, dfact def _create_pickle_files(self, adr, dsample, dpref, dsuff, dfact): f = open(adr + ".sample.pkl", "wb") pickle.dump(dsample, f) f.close() f = open(adr + ".pref.pkl", "wb") pickle.dump(dpref, f) f.close() f = open(adr + ".suff.pkl", "wb") pickle.dump(dsuff, f) f.close() f = open(adr + ".fact.pkl", "wb") pickle.dump(dfact, f) f.close()