1 """classes to handle configuration files"""
2
3 import os, logging
4 from ConfigParser import SafeConfigParser
5 from collections import namedtuple
6
7 import numpy as np
8 import pandas as pd
9
10 from .. import archive
11
12 logging.getLogger(__name__)
13 log = logging.getLogger()
16 cfg = SafeConfigParser()
17 if not len(cfg.read([path])):
18 raise ValueError("cannot load %s" % path)
19 return cfg
20
22 if not os.path.isfile(s):
23 import warnings
24 warnings.warn("non-existent input file '%s'" % s)
25 return s
26
28 items = map(tp, s.split())
29 p = []
30 npairs = len(items) / 2
31 for i in range(npairs):
32 p.append( (items[i*2], items[i*2+1]) )
33 return tuple(p)
34
36 return tuple( map(tp, s.split()) )
37
39 """abstract class with factory method from a config file
40
41 to subclass, need to define
42
43 _types: types for properties
44 _section: section on the confifg file"""
45
46 @classmethod
48 'make a new object from a settins file'
49
50 if not len(cls._fields) == len(cls._types):
51 raise ValueError("a type (found %d) / key (found %d) needed for %s" % (len(cls._fields),
52 len(cls._types), str(cls)))
53
54 cfg = _open_cfg(path)
55 iterable = map(lambda (k,t): t( cfg.get(cls._section, k) ),
56 zip(cls._fields, cls._types))
57 result = new(cls, iterable)
58 if len(result) != len(cls._fields):
59 raise TypeError('expected %d arguments, got %d' % (len(cls._fields),
60 len(result)))
61 result._check_consistency()
62 return result
63
65 "check as much as you can that values of params make sense"
66
67 pass
68
69
70 -class ModelSpec (namedtuple('MetaParams', ("nkerns rfield pool lreg_size")), CfgFactory):
71 __slots__ = ()
72
73 _types = (_check_get_singles, _check_get_pairs, _check_get_pairs, int)
74 _section = "model"
75
77 if len(self.nkerns) != len(self.pool) or len(self.nkerns) != len(self.rfield):
78 raise ValueError(" len(self.nkerns) != len(self.pool) or len(self.nkerns) != len(self.rfield)")
79
80 @property
82 return (self.nkerns, self.rfield, self.pool)
83
84 -class MtrainSpec( namedtuple("MtrainSpec", "batch_size l1_rate l2_rate lr tau momentum_mult nepochs minepochs patience"), CfgFactory ):
85 __slots__ = ()
86
87 _types = (int, float, float, float, int, float, int, int, int)
88 _section = "modtrain"
89
91 if self.minepochs > self.nepochs:
92 raise ValueError("minepochs (%d) > nepochs (%d)",
93 self.minepochs, self.nepochs)
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 -class AESpec(namedtuple("AutoEncoderSpec", "rec_error minepochs maxepochs batch_size noise"), CfgFactory):
114 __slots__ = ()
115
116 _types = (float, int, int, int, float)
117 _section = "ae"
118
121
122 -class DataSpec( namedtuple("DataSpec", "dataname tracks width train_s valid_s labels track_names label_names batch_size train_batches valid_batches") ):
123 __slots__ = ()
124
125
126 @classmethod
128 if valid_s <= 0 or valid_s >= 1:
129 raise ValueError("valid_s (%f) should be between (0, 1) ", valid_s)
130
131 if batch_s > tot_size * min(valid_s, 1-valid_s):
132 raise ValueError("batch size (%d) too big > min(valid_s=%d, train_s=%d)",
133 batch_s, tot_size * valid_s, tot_size *(1-valid_s))
134
135 all_batches = range( tot_size / batch_s )
136 try:
137 valid_batches = all_batches[valid_idx:valid_idx+int(len(all_batches)*valid_s)]
138 except IndexError:
139 raise ValueError("valid_idx (%d) should be between 0 and %d",
140 valid_idx, len(all_batches)-1)
141 train_batches = list( set(all_batches) - set(valid_batches) )
142 assert set(train_batches + valid_batches) == set(all_batches)
143 assert len( set(train_batches) & set(valid_batches) ) == 0
144 rng.shuffle(train_batches)
145 rng.shuffle(valid_batches)
146 logging.info("train batches: %s", str(train_batches))
147 logging.info("valid batches: %s", str(valid_batches))
148 return (train_batches, valid_batches)
149
150
151 @classmethod
153 """initialize from archive
154
155 path: dataset specification path
156 batch_s: batch size
157 train_s: train size (default = 50%)
158 valid_s: vlaidation size (default 25% )"""
159
160
161 objX = archive.load_object( archive.archname(path),
162 archive.basename(path) + "/X")
163 objT = archive.load_object( archive.archname(path),
164 archive.basename(path) + "/T")
165
166 gene_names = objX.items
167 track_names = objX.major_axis
168 width = objX.minor_axis.shape[0]
169 label_names = np.unique( objT["label_name"].values ).tolist()
170
171
172
173
174
175
176
177 train_batches, valid_batches = cls.batches_from_data(len(gene_names),
178 batch_s, valid_s,
179 valid_idx, rng)
180
181 return cls._make( (archive.basename(path), len(track_names), width,
182 batch_s * len(train_batches), batch_s * len(valid_batches), len(label_names),
183 track_names, label_names, batch_s, train_batches, valid_batches) )
184
185
186 if __name__ == "__main__":
187 pass
188