#!/usr/bin/env/ python
################################################################################
# Copyright (C) 2016 Brecht Baeten
# This file is part of batchpy.
#
# batchpy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# batchpy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with batchpy. If not, see <http://www.gnu.org/licenses/>.
################################################################################
import os
import sys
import re
import numpy as np
import itertools
import time
from . import run
[docs]class Batch(object):
"""
The batchpy batch class
A batch can contain several runs of computations. Using batchpy these
batches can be easily defined using python files (to support version
control) and run.
The computation results can be stored in memory or saved to disk per run.
When the result of a run is saved it is cleared from memory which allows for
computations which would require more memory then available if all runs were
to be executed at once.
Parameters
----------
name : string
A name for the batch
path : string, optional
A optional path to store results, if not provided the current path is
chosen.
saveresult : boolean, optional
Save the results to disk or not, this argument is passed to all runs.
Examples
--------
>>> batch = batchpy.Batch('mybatch')
"""
def __init__(self,name,path='',saveresult=True):
"""
Creates a batch
See above
"""
self.name = name
self.path = path
self.run = []
self._saveresult = saveresult
[docs] def add_run(self,runclass,parameters):
"""
Adds a run
Parameters
----------
runclass : :py:meth:`~batchpy.run.Run` subclass
A class reference which creates an object when supplied the
parameters.
parameters : dict
A dictionary of parameters to be supplied to the
:py:meth:`~batchpy.run.Run.run` method of the runclass.
Examples
--------
>>> batch.add_run(Myrun,{'A':1,'B':[1,2,3],'C':'spam'})
>>> batch()
"""
run = runclass(self,saveresult=self._saveresult,**parameters)
self.run.append(run)
[docs] def add_factorial_runs(self,runclass,parameters):
"""
Adds a full factorial design of runs based on parameter lists
Parameters
----------
runclass : :py:meth:`batchpy.run.Run` subclass
A class reference which creates an object when supplied the
parameters.
parameters : dict
A dictionary of lists of parameters to be supplied to the
:py:meth:`~batchpy.run.Run.run` method of the runclass.
Examples
--------
>>> batch.add_factorial_runs(Myrun,{'par1':[0,1,2],'par2':[5.0,7.1]})
>>>
>>> # is equivalent with:
>>> batch.add_run(Myrun,{par1:0,par2:5.0})
>>> batch.add_run(Myrun,{par1:0,par2:7.1})
>>> batch.add_run(Myrun,{par1:1,par2:5.0})
>>> batch.add_run(Myrun,{par1:1,par2:7.1})
>>> batch.add_run(Myrun,{par1:2,par2:5.0})
>>> batch.add_run(Myrun,{par1:2,par2:7.1})
"""
valslist = list(itertools.product(*parameters.values()))
for vals in valslist:
par = {key:val for key,val in zip(parameters.keys(),vals)}
self.add_run( runclass,par )
[docs] def add_resultrun(self,id):
"""
Adds saved runs by id
Parameters
----------
id : string or list of strings
The id of the run.
Examples
--------
>>> batch.add_resultrun('3ecc784a9d5cf26eb6420de2a43f04b310073925')
"""
if not hasattr(id,'__iter__'):
ids = [id]
for idi in id:
r = run.ResultRun(self,idi)
self.run.append(r)
[docs] def get_runs_with(self,**kwargs):
"""
Returns a list of runs with the specified parameter values
Parameters
----------
kwargs : anything
Keyword arguments of parameter values .
Several conditions can be appended to a parameter:
`__eq`: equal, same as appending nothing
`__ne`: not equal
`__ge`: greater or equal
`__le`: less or equal
Returns
-------
runs : list
a list of runs
Examples
--------
>>> batch = batchpy.Batch('mybatch')
>>> batch.add_factorial_runs(Myrun,{'par1':[0,1,2],'par2':[5.0,7.1]})
>>> runs = batch.get_runs_with(par1=0)
>>> print(runs)
>>> runs = batch.get_runs_with(par1__ge=1,par2=5.0)
>>> print(runs)
"""
runs = []
for run in self.run:
add = True
for key,val in kwargs.items():
# create the condition
if key.endswith('__eq'):
def condition(par,val):
try:
return np.isclose( par,val )
except:
return par == val
key = key[:-4]
if key.endswith('__ne'):
def condition(par,val):
try:
return not np.isclose( par,val )
except:
return not par == val
key = key[:-4]
elif key.endswith('__ge'):
def condition(par,val):
return par >= val
key = key[:-4]
elif key.endswith('__le'):
def condition(par,val):
return par <= val
key = key[:-4]
else:
def condition(par,val):
try:
return np.isclose( par,val )
except:
return par == val
# check the condition
if key in run.parameters:
con = condition( run.parameters[key],val )
if hasattr(con,'__iter__'):
con = con.all()
if not con:
add = False
break
if add:
runs.append(run)
return runs
[docs] def __call__(self,runs=-1,verbose=1):
"""
Runs the remainder of the batch or a specified run
Parameters
----------
runs : int or list of ints, optional
Indices of the runs to be executed, -1 for all runs
verbose : int, optional
Integer determining the amount of printed output 0/1/2
"""
title_width = 80
# check which runs are to be done
expandedruns = []
if isinstance(runs,list) or isinstance(runs,np.ndarray):
for ind in runs:
if not self.run[ind].done:
expandedruns.append(ind)
else:
if runs < 0:
for ind in range(len(self.run)):
if not self.run[ind].done:
expandedruns.append(ind)
elif not self.run[runs].done:
expandedruns.append(runs)
# determine when to print the title string
if verbose > 1:
skip = 1
elif verbose > 0:
skip = int( np.ceil( len(expandedruns)/50. ) )
starttime = time.time()
for i,run in enumerate(expandedruns):
if verbose > 0:
# print the run title string
if i%skip==0:
runtime = time.time()-starttime
if i==0:
etastr = '/'
else:
etastr = '{0:.1f} min'.format( runtime/i*(len(expandedruns)-i)/60 )
title_str = '### run {0} in '.format(run)
title_str += strlist(expandedruns)
title_str += (40-len(title_str))*' '
title_str += 'runtime: {0:.1f} min'.format(runtime/60)
title_str += 4*' '
title_str += 'eta: '+etastr
title_str += (title_width-len(title_str)-3)*' ' +'###'
print(title_str)
# flush the printing cue
sys.stdout.flush()
# run the run
self.run[run]()
runtime = time.time()-starttime
if verbose > 0:
print('total runtime {0:.1f} min'.format(runtime/60))
print('done')
sys.stdout.flush()
[docs] def save_ids(self,filename=None,format='npy'):
"""
Saves all ids in the batch to a python file with an ``ids`` list
Parameters
----------
filename : str, optional
The filename of the output file. If no filename is supplied a
file is created in the ``_res`` folder, named ``batchname_ids.npy``
or ``batchname_ids.py`` depending on the format argument.
If a filename is supplied, the format argument is ignored and the
filename extension is used to determine the format.
format : str, optional
The format to save the ids to, 'npy'/'py'. By default, a .npy file
is created, the ids can be retrieved with
``ids = np.load('batchname_ids.npy')``. If the 'py' format is
supplied the ids are written to a python file in a list.
Examples
--------
>>> batch.save_ids()
In another interpreter:
>>> import numpy as np
>>> ids = np.load('_res/mybatch_ids.npy')
>>> print(ids)
"""
if filename is None:
filename = os.path.join( self.savepath, '{}_ids.{}'.format(self.name,format) )
else:
format = os.path.splitext(filename)[1][1:]
if format == 'npy':
np.save(filename,[run.id for run in self.run])
elif format == 'py':
with open(filename,'w') as f:
f.write('ids = [\n')
for run in self.run:
f.write(' \'{}\',\n'.format(run.id))
f.write(']')
else:
raise Exception('Format \'{}\' not recognized, should be \'npy\' or \'py\'.'.format(format))
@property
def savepath(self):
"""
Property returning the path where files are saved
"""
dirname = os.path.join(self.path, '_res' )
filename = os.path.join(dirname , self.name )
if not os.path.isdir(dirname):
os.makedirs(dirname)
if not os.path.exists(os.path.join(dirname,'__init__.py')):
with open(os.path.join(dirname,'__init__.py'),'w') as f:
pass
return dirname
def _get_filenames(self):
"""
Returns a list of found files which correspond to the batch
"""
dirname = self.savepath
filenames = []
files = [f for f in os.listdir(dirname) if re.match(self.name+r'_run.*\.npy', f)]
for f in files:
filenames.append( os.path.join(dirname , f) )
return filenames
# helper functions
def strlist(runs):
if len(runs) > 5:
return '[' + str(runs[0]) + ',' + str(runs[1]) + ',...,' + str(runs[-2]) + ',' + str(runs[-1]) +']'
else:
return str(runs)