Source code for batchpy.run

#!/usr/bin/env/ python
################################################################################
#    Copyright (C) 2016 Brecht Baeten
#    This file is part of batchpy.
#    
#    batchpy is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#    
#    batchpy is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#    
#    You should have received a copy of the GNU General Public License
#    along with batchpy.  If not, see <http://www.gnu.org/licenses/>.
################################################################################

import os
import hashlib
import types
import inspect
import time

import numpy as np

[docs]class Run(object): """ A batchpy run base class This class is intended as a base class from which custom user defined run objects can inherit. In the custom run class the :py:meth:`~batchpy.run.Run.run` method needs to be redefined to actually run the wanted computations and return the result as a dictionary. Parameters ---------- batch : :py:meth:`~batchpy.batch.Batch` object The batch the run belongs to saveresult : boolean, optional Save the results to disk or not, if not the result is available in the _result attribute **parameters : Keyword parameters which modify the run instance Notes ----- Batchpy runs should not be created directly but through the batch methods :py:meth:`~batchpy.batch.Batch.add_run` or :py:meth:`~batchpy.batch.Batch.add_factorial_runs`. Examples -------- >>> class Myrun(batchpy.Run): ... def run(self,mypar=5): ... # some conplicated computation ... return {'val': 2*mypar} ... >>> batch = batchpy.Batch('mybatch') >>> run = Myrun(batch,saveresult=False,mypar=5) >>> run() {'val': 10} >>> """ def __init__(self,batch,saveresult=True,**parameters): """ Creates a batchpy run See above """ self.batch = batch self._id = None self._done = False self._runtime = None self._saveresult = saveresult self._result = None # get the parameters from the run function self._resultonly = False self._parameters = {} a = inspect.getargspec(self.run) for key,val in zip(a.args[-len(a.defaults):],a.defaults): self._parameters[key] = val for key,val in parameters.iteritems(): self._parameters[key] = val # create the run id self._id = self.generate_id(self._parameters) # check if there is a result saved self._check_result()
[docs] def run(self): """ Perform calculations and return the result This method should be overwritten in a user defined child class to perform the actual computations. Parameters ---------- parameters parameters can be defined as named parameters. The use of ``**kwargs`` is not supported. Returns ------- res : dict a dictionary with the results of the run Examples -------- >>> class Myrun(batchpy.Run): ... def run(self,mypar=5): ... # some conplicated computation ... return {'val': 2*mypar} ... """ return {}
[docs] def __call__(self): """ Checks if the run results are already computed and compute them if not. When a run is called the class checks if the results are available in memory or on the disk. When the result is available in memory, it is returned. When it is available on disk, it is loaded and returned. When the result is not available it is computed using the :code:`run` method and the results are stored on disk (if the :code:`_saveresult` attribute is true) or in the :code:`_result` attribute (otherwise). The computation is timed and the runtime is saved in the :code:`runtime` attribute. Returns ------- res : anything Results. Examples -------- >>> batch = batchpy.Batch('mybatch') >>> run = Myrun(batch,saveresult=False,mypar=5) >>> run() {'val': 10} """ if not self._done: t_start = time.time() res = self.run(**self.parameters) t_end = time.time() self._runtime = t_end-t_start if self._saveresult: self._save(res) else: self._result = res self._done = True else: res = self.load() return res
[docs] def load(self): """ Checks if the run results are already computed and return them if so. When the result is available in memory, it is returned. When it is available on disk, it is loaded and returned. When the result is not computed yet this returns :code:`None`. Returns ------- res : anything, :code:`None` Results, returns :code:`None` if the results are not available. Examples -------- >>> run.load() {'val': 10} """ if self._saveresult: data = self._load() if not data is None: res = data['res'] # if statement for compatibility with older saved runs if 'runtime' in data: self._runtime = data['runtime'] if not 'parameters' in data: print('The loaded data is in the old style, to add functionality run \'batchpy.convert_run_to_newstyle(run)\'') return res else: return None else: return self._result
[docs] def clear(self): """ Tries to erase the run results from the disk Returns ------- success : bool :code:`True` if the run was deleted from the disk, :code:`False` otherwise. Examples -------- >>> run.clear() True """ try: os.remove(self.filename) self._done = False return True except: return False
@property def id(self): """ Property returning the id. """ return self._id @property def parameters(self): """ Property returning the run parameters. """ return self._parameters @property def result(self): """ Property alias to self.load(). """ return self.load() @property def done(self): """ Property returning if the run is done or not. """ return self._done @done.setter def done(self,value): self._done = value @property def runtime(self): """ Property returning the computation time of a run """ return self._runtime @property def index(self): """ Property returning the run index in its batch. """ return self.batch.run.index(self) @property def filename(self): """ Property returning the filename of the run. """ return os.path.join(self.batch.savepath , '{}_{}.npy'.format(self.batch.name,self._id))
[docs] def generate_id(self,parameters): """ Generates an id hash from the parameters. The id hash is used to identify a run. It is hashed from the parameters used to create the run to ensure that when even a single parameter is changed the run ids are different. The id is stored in the :code`id` attribute. Parameters ---------- parameters : dict a dictionary with parameters from which to compute the hash. Returns ------- id : string the id hash of this run Notes ----- When classes, methods or functions are supplied as parameters, the hash is created using their name attribute. This avoids ids being different when python is restarted. A hash created from the function itself would be different each time python starts as the object resides in a different memory location. Examples -------- >>> run.generate_id(run.parameters) '10ae24979c5028fa873651bca338152dc0484245' """ id_dict = {key:self._serialize(val) for key,val in parameters.items() if not self._serialize(val) == '__unhashable__'} id = hashlib.sha1(str([ id_dict[key] for key in id_dict.keys() ])).hexdigest() return id
def _save(self,res): """ Saves the result in res along with run identifiers. Parameters ---------- res : dict The result dictionary """ parameters = {key:self._serialize(val) for key,val in self.parameters.items()} np.save(self.filename,{'res':res,'id':self._id,'runtime':self._runtime,'parameters': parameters}) def _load(self): """ Loads all data from the file with the correct id if it exists, returns None otherwise. """ data = None if os.path.isfile(self.filename): data = np.load(self.filename).item() return data def _serialize(self,val): """ Serialize a parameter. Parameters ---------- val : anything a dictionary Notes ----- Not all parameter types are serializable. If a parameter can not be serialized ``'__unserializable__'`` is returned. """ serialized = '__unhashable__' nametypes = [ types.BuiltinFunctionType, types.BuiltinMethodType, types.ClassType, types.FunctionType, types.GeneratorType, types.InstanceType, types.LambdaType, types.MethodType, types.ModuleType, types.TypeType, types.UnboundMethodType ] if isinstance(val,types.CodeType): serialized = val.co_code elif isinstance(val,types.FileType): serialized = val.name else: for t in nametypes: if isinstance(val,t): serialized = val.__name__ break if serialized == '__unhashable__': string = str(val) if len(string)>0 and string[0]=='<' and string[-1]=='>' and 'at 0x' in string: print('WARNING: parameter {} can not be hashed and is not included in the id which could lead to loss of data and undesired results'.format(val) ) else: serialized = val return serialized def _check_result(self): """ Checks if a result file is stored on the disk Checks if a file with the correct name can be found. Is so, it sets the :code:`done` attribute to True. If not, the :code:`done` attribute is set to false. Examples -------- >>> run._check_result() >>> run.done False """ # check if there are results saved with the same id if os.path.isfile(self.filename): self._done = True else: self._done = False
[docs]class ResultRun(Run): """ A class to load run results. Parameters ---------- batch : :py:meth:`~batchpy.batch.Batch` object The batch the run belongs to. id : str or list of str An id or list of ids specifying previously saved runs. Notes ----- Batchpy result runs should not be created directly but through the batch methods :py:meth:`~batchpy.batch.Batch.add_resultrun`. Examples -------- >>> batch = batchpy.Batch('mybatch') >>> run = batchpy.ResultRun(batch,'3ecc784a9d5cf26eb6420de2a43f04b310073925') """ def __init__(self,batch,id): """ Creates a batchpy result run See above """ self.batch = batch self._runtime = None self._done = True self._parameters = None self._saveresult = True self._result = None self._id = id data = self._load() if not data is None: if 'runtime' in data: self._runtime = data['runtime'] if 'parameters' in data: self._parameters = data['parameters'] del data def run(self,**kwargs): pass def __call__(self): self._done = True super(ResultRun,self).__call__() def _save(self): pass def clear(self): pass
[docs]def convert_run_to_newstyle(run): """ Converts a saved run result to include the parameters. Parameters ---------- run : :py:meth:`~batchpy.run.Run` object or child class The run to convert the result from. """ res = run.load() if not res is None: run._save(res)