Building Nests¶
Basic Nest¶
From examples/basic_nest/make_nest.py
, this is a simple, combinatorial
example.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | #!/usr/bin/env python
import glob
import math
import os
import os.path
from nestly import Nest
wd = os.getcwd()
input_dir = os.path.join(wd, 'inputs')
nest = Nest()
# Simplest case: Levels are added with a name and an iterable
nest.add('strategy', ('exhaustive', 'approximate'))
# Sometimes it's useful to add multiple keys to the nest in one operation, e.g.
# for grouping related data.
# This can be done by passing an iterable of dictionaries to the `Nest.add` call,
# each containing at least the named key, along with the `update=True` flag.
#
# Here, 'run_count' is the named key, and will be used to create a directory in the nest,
# and the value of 'power' will be added to each control dictionary as well.
nest.add('run_count', [{'run_count': 10**i, 'power': i}
for i in range(3)], update=True)
# label_func can be used to generate a meaningful name. Here, it strips the all
# but the file name from the file path
nest.add('input_file', glob.glob(os.path.join(input_dir, 'file*')),
label_func=os.path.basename)
# Items can be added that don't generate directories
nest.add('base_dir', [os.getcwd()], create_dir=False)
# Any function taking one argument (control dictionary) and returning an
# iterable may also be used.
# This one just takes the logarithm of 'run_count'.
# Since the function only returns a single result, we don't create a new directory.
def log_run_count(c):
run_count = c['run_count']
return [math.log(run_count, 10)]
nest.add('run_count_log', log_run_count, create_dir=False)
nest.build('runs')
|
This example is then run with the ../examples/basic_nest/run_example.sh
script.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | #!/bin/sh
set -e
set -u
set -x
# Build a nested directory structure
./make_nest.py
# Let's look at a sample control file:
cat runs/approximate/1/file1/control.json
# Run `echo.sh` using every control.json under the `runs` directory, 2
# processes at a time
nestrun --processes 2 --template-file echo.sh -d runs
# Merge the CSV files named '{strategy}.csv' (where strategy value is taken
# from the control file)
nestagg delim '{strategy}.csv' -d runs -o aggregated.csv
|
echo.sh
is just the simple script that runs nestrun
and aggregates the
results into an aggregate.csv
file:
1 2 3 4 5 6 7 | #!/bin/sh
#
# Echo the value of two fake output variables: var1, which is always 13, and
# var2, which is 10 times the run_count.
echo "var1,var2
13,{run_count}0" > "{strategy}.csv"
|
Meal¶
This is a bit more complicated, with lookups on previous values of the control dictionary:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | #!/usr/bin/env python
import glob
import os
import os.path
from nestly import Nest, stripext
wd = os.getcwd()
startersdir = os.path.join(wd, "starters")
winedir = os.path.join(wd, "wine")
mainsdir = os.path.join(wd, "mains")
nest = Nest()
bn = os.path.basename
# Start by mirroring the two directory levels in startersdir, and name those
# directories "ethnicity" and "dietary".
nest.add('ethnicity', glob.glob(os.path.join(startersdir, '*')),
label_func=bn)
# In the `dietary` key, the anonymous function `lambda ...` chooses as values
# names of directories the current `ethnicity` directory
nest.add('dietary', lambda c: glob.glob(os.path.join(c['ethnicity'], '*')),
label_func=bn)
## Now get all of the starters.
nest.add('starter', lambda c: glob.glob(os.path.join(c['dietary'], '*')),
label_func=stripext)
## Then get the corresponding mains.
nest.add('main', lambda c: [os.path.join(mainsdir, bn(c['ethnicity']) + "_stirfry.txt")],
label_func=stripext)
## Take only the tasty wines.
nest.add('wine', glob.glob(os.path.join(winedir, '*.tasty')),
label_func=stripext)
## The wineglasses should be chosen by the wine choice, but we don't want to
## make a directory for those.
nest.add('wineglass', lambda c: [stripext(c['wine']) + ' wine glasses'],
create_dir=False)
nest.build('runs')
|