我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.ifelse()。
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a gate layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ # compute gating values, Eq.(3) G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0]) X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word) return eval(activ)(X)
def concat_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a concat layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), tensor.dot(tensor.concatenate([X_word, X_char], axis=2), tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')]) return eval(activ)(X)
def model(inputs, _is_training, params, batch_size, hidden_size, drop_i, drop_s, init_scale, init_H_bias, _theano_rng): noise_i_for_H = get_dropout_noise((batch_size, hidden_size), drop_i, _theano_rng) i_for_H = ifelse(_is_training, inputs * noise_i_for_H, inputs) i_for_H = linear.model(i_for_H, params, hidden_size, hidden_size, init_scale, bias_init=init_H_bias) # Dropout noise for recurrent hidden state. noise_s = get_dropout_noise((batch_size, hidden_size), drop_s, _theano_rng) def step(i_for_H_t, y_tm1, noise_s): s_lm1_for_H = ifelse(_is_training, y_tm1 * noise_s, y_tm1) return T.tanh(i_for_H_t + linear.model(s_lm1_for_H, params, hidden_size, hidden_size, init_scale)) y_0 = shared_zeros((batch_size, hidden_size), name='h0') y, _ = theano.scan(step, sequences=i_for_H, outputs_info=[y_0], non_sequences = [noise_s]) y_last = y[-1] sticky_state_updates = [(y_0, y_last)] return y, y_0, sticky_state_updates
def skip_connect(self, input, layer_index): if ([] == self.noisy_z): raise ValueError('Error: noisy_z is an empty list, noisy_fprop must be run before skip_connect') MU = self.compute_mu(input, self.As[layer_index]) V = self.compute_v(input, self.As[layer_index]) reconstruction = (self.noisy_z[-1] - MU) * V + MU # # Non trainable Batchnormalisation # mean = reconstruction.mean(0) # std = reconstruction.std(0) + 1e-10 # # # Only batchnormalise for a batchsize > 1 # mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype)) # std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype)) # reconstruction = (reconstruction - mean) / std self.tmp = reconstruction # To caluclate the reconstruction error later self.reconstructions.append(reconstruction) self.noisy_z = self.noisy_z[0:-1] return reconstruction
def forward(self, x, seq): """ :param x: (length, dim) :param seq: (length - 1, 3) :return: """ # (length, dim) -> (2 * length - 1, dim) vector = T.concatenate([x, T.zeros_like(x)[:-1, :]], axis=0) # vector = theano.printing.Print()(vector) # scan length-1 times hs, _ = theano.scan(fn=self.encode, sequences=seq, outputs_info=[vector, shared_scalar(0)], name="compose_phrase") comp_vec_init = hs[0][-1][-1] comp_rec_init = T.sum(hs[1]) if self.normalize: hidden = x[0] / x[0].norm(2) else: hidden = x[0] comp_vec = ifelse(x.shape[0] > 1, comp_vec_init, hidden) comp_rec = ifelse(x.shape[0] > 1, comp_rec_init, shared_zero_scalar()) return comp_vec, comp_rec
def forward(self, x): """ :param x: (length, dim) :return: (hidden_dim, ) """ if self.padding_size > 0: # (padding_size + length + padding_size, dim) x = temporal_padding_2d(x, (self.padding_size, self.padding_size)) safe_x = temporal_padding_2d(x, (0, self.kernel_size - x.shape[0])) # If Kernel Size is greater than sentence length, padding at the end of sentence x = ifelse(T.gt(self.kernel_size - x.shape[0], 0), safe_x, x) conv_result = self.forward_conv(x) pooling_result = get_pooling(conv_result, self.pooling) dropout_out = dropout_from_layer(pooling_result, self.dropout) return self.act.activate(dropout_out + self.b)
def test_lazy_if(self): # Tests that lazy if works .. even if the two results have different # shapes but the same type (i.e. both vectors, or matrices or # whatnot of same dtype) x = tensor.vector('x', dtype=self.dtype) y = tensor.vector('y', dtype=self.dtype) c = tensor.iscalar('c') f = theano.function([c, x, y], ifelse(c, x, y), mode=self.mode) self.assertFunctionContains1(f, self.get_ifelse(1)) rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype) vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype) assert numpy.allclose(vx, f(1, vx, vy)) assert numpy.allclose(vy, f(0, vx, vy))
def test_pushout3(self): raise SkipTest("Optimization temporarily disabled") x1 = tensor.scalar('x1') y1 = tensor.scalar('x2') y2 = tensor.scalar('y2') c = tensor.iscalar('c') two = numpy.asarray(2, dtype=theano.config.floatX) x, y = ifelse(c, (x1, y1), (two, y2), name='f1') o3 = numpy.asarray(0.3, dtype=theano.config.floatX) o2 = numpy.asarray(0.2, dtype=theano.config.floatX) z = ifelse(c, o3, o2, name='f2') out = x * z * y f = theano.function([x1, y1, y2, c], out, allow_input_downcast=True) assert isinstance(f.maker.fgraph.toposort()[-1].op, IfElse) rng = numpy.random.RandomState(utt.fetch_seed()) vx1 = rng.uniform() vy1 = rng.uniform() vy2 = rng.uniform() assert numpy.allclose(f(vx1, vy1, vy2, 1), vx1 * vy1 * 0.3) assert numpy.allclose(f(vx1, vy1, vy2, 0), 2 * vy2 * 0.2)
def test_c_thunks(): a = tensor.scalars('a') b, c = tensor.vectors('bc') cases = [False] if theano.config.cxx: cases.append(True) for c_thunks in cases: f = function([a, b, c], ifelse(a, a * b, b * c), mode=Mode( optimizer=None, linker=vm.VM_Linker(c_thunks=c_thunks, use_cloop=False))) f(1, [2], [3, 2]) from nose.tools import assert_raises assert_raises(ValueError, f, 0, [2], [3, 4]) assert any([hasattr(t, 'cthunk') for t in f.fn.thunks]) == c_thunks
def remove_adjdup(x): """ Remove adjacent duplicate items of a vector x: vector return a vector with adjacent duplicate items removed, for example [1,2,2,2,3,3,4] -> [1,2,3,4] """ def update(x, nondup, idx): nondup = tensor.switch(tensor.eq(nondup[idx], x), nondup, tensor.set_subtensor(nondup[idx + 1], x)) # tensor.switch is much faster than ifelse idx = tensor.switch(tensor.eq(nondup[idx], x), idx, idx + 1) return nondup, idx nondup = x idx = tensor.as_tensor_variable(0) idx = tensor.cast(idx, 'int32') result, updates = theano.scan(fn = update, sequences=x, outputs_info=[nondup, idx], name='remove_adjdup') nondup = result[0][-1] idx = result[1][-1] return nondup[0:idx+1]
def _remove_adjdup(x): """ Remove adjacent duplicate items of a vector x: vector return a vector with adjacent duplicate items removed, for example [1,2,2,2,3,3,4] -> [1,2,3,4] """ def update(x, nondup, idx): nondup = tensor.switch(tensor.eq(nondup[idx], x), nondup, tensor.set_subtensor(nondup[idx + 1], x)) # tensor.switch is much faster than ifelse idx = tensor.switch(tensor.eq(nondup[idx], x), idx, idx + 1) return nondup, idx nondup = x idx = tensor.as_tensor_variable(0) idx = tensor.cast(idx, 'int32') result, updates = theano.scan(fn = update, sequences=x, outputs_info=[nondup, idx], name='remove_adjdup') nondup = result[0][-1] idx = result[1][-1] return nondup[0:idx+1]
def _editdist(s, t): """ Levenshtein's edit distance function :param s: vector, source string :param t: vector, target string :return: edit distance, scalar """ def update(x, previous_row): current_row = previous_row + 1 current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], tensor.add(previous_row[:-1], tensor.neq(target,x)))) current_row = tensor.set_subtensor(current_row[1:], tensor.minimum(current_row[1:], current_row[0:-1] + 1)) return current_row source, target = ifelse(tensor.lt(s.shape[0], t.shape[0]), (t, s), (s, t)) previous_row = tensor.arange(target.size + 1, dtype=theano.config.floatX) result, updates = theano.scan(fn=update, sequences=source, outputs_info=previous_row, name='editdist') return result[-1,-1]
def encode(self, x, shape=None): if shape is None: xp = create_shared_variable(np.zeros((0, )*x.ndim), name='xp') delta = ifelse(xp.size>0, x-xp, x) else: xp = create_shared_variable(np.zeros(shape), name='xp{}'.format(shape)) delta = x - xp add_update(xp, x) y = self.kp*x + self.kd*delta if self.quantization is None: return y elif self.quantization=='herd': return herd(y, shape=shape) else: raise Exception('No quantizer: {}'.format(self.quantization))
def geoseries_sum(r, t_end, t_start): """ Sum of r**t from t=t_start to t=t_end, inclusive :param r: :param t_end: :param t_start: :return: """ # return ifelse(tt.eq(r, 1), (t_end-t_start+1).astype(theano.config.floatX), (r**(t_end+1)-r**t_start)/(r-1)) return ifelse(tt.eq(r, 1), (t_end-t_start+1).astype(theano.config.floatX), (r**(t_end+1)-r**t_start)/(r-1))
def connect(self, inputs, is_train): """ Trick to speed up model compiling at decoding time. (Avoids building a complicated CG.) """ if not self.fix_mask: self.generate_mask(inputs.shape, is_train) if self.fast_predict: return inputs * (1 - self.dropout_prob) return ifelse(is_train, inputs * self.dropout_mask, inputs * (1 - self.dropout_prob))
def scale(X, max_norm): curr_norm = T.sum(T.abs_(X)) return ifelse(T.lt(curr_norm, max_norm), X, max_norm * (X / curr_norm))
def fprop(self, x): if_longer = x[:self.required] padding = ReplicateLayer(TT.max([1, self.required - x.shape[0]]))(x[-1]).out if_shorter = TT.concatenate([x, padding]) diff = x.shape[0] - self.required self.out = ifelse(diff < 0, if_shorter, if_longer) return self.out
def apply_dropout(self, x, noise): return ifelse(self._is_training, noise * x, x)
def fit(self, X, y=None): self.n_features = y.shape[0] self.weights['input'] = theano.shared(value=np.zeros(( self.n_features, X.shape[1], self.spatial[0], self.spatial[1]), dtype=theano.config.floatX), name='w', borrow=True) input = T.tensor4(name='input') target = T.tensor4(name='target') decay = T.scalar(name='decay') xy = T.nnet.conv2d(input.transpose(1,0,2,3), target.transpose(1,0,2,3), border_mode=self.pad, subsample=self.stride) xx = T.sum(T.power(input, 2), axis=(0,2,3)) k = ifelse(self.hidden_matrices['input'] is None, ) lam = theano.shared(value=self._C, name='constrain', borrow=True) prediction = T.nnet.conv2d(input, self.weights['input'], border_mode=self.pad, subsample=self.stride) weights, _ = theano.scan( fn=lambda a, k, c: a/(k+c), outputs_info=None, sequences=[self.hidden_matrices['A'].transpose(1,0,2,3), self.hidden_matrices['K']], non_sequences=lam) new_weights = weights.transpose(1,0,2,3) updates = [(self.hidden_matrices['K'], self.hidden_matrices['K'].dot(decay)+xx), (self.hidden_matrices['A'], self.hidden_matrices['A'].dot(decay) + xy), (self.weights['input'], new_weights)] self.conv_fct['train'] = theano.function([input, target, decay], prediction, updates=updates) self.conv_fct['predict'] = theano.function([input], prediction) return self.conv_fct['train'](X, y, 1)
def inner_fn_sample(stm1): prior_stmu = T.tanh( T.dot(Wl_stmu_stm1, stm1) + bl_stmu ) prior_stsig = T.nnet.softplus( T.dot(Wl_stsig_stm1, stm1) + bl_stsig ) + sig_min_states # Set explicit prior on score during last time step #prior_stmu = ifelse(T.lt(t,n_run_steps - 5),prior_stmu, T.set_subtensor(prior_stmu[0,:],0.1)) #prior_stsig = ifelse(T.lt(t,n_run_steps - 5),prior_stsig, T.set_subtensor(prior_stsig[0,:],0.001)) st = prior_stmu + theano_rng.normal((n_s,n_samples))*prior_stsig ost = T.nnet.relu( T.dot(Wl_ost_st,st) + bl_ost ) ost2 = T.nnet.relu( T.dot(Wl_ost2_ost,ost) + bl_ost2 ) ost3 = T.nnet.relu( T.dot(Wl_ost3_ost2,ost2) + bl_ost3 ) otmu = T.dot(Wl_otmu_st, ost3) + bl_otmu otsig = T.nnet.softplus(T.dot(Wl_otsig_st, ost3) + bl_otsig) + sig_min_obs ohtmu = T.dot(Wl_ohtmu_st, ost3) + bl_ohtmu ohtsig = T.nnet.softplus( T.dot(Wl_ohtsig_st, ost3) + bl_ohtsig ) + sig_min_obs oatmu = T.dot(Wl_oatmu_st, ost3) + bl_oatmu oatsig = T.nnet.softplus( T.dot(Wl_oatsig_st, ost3) + bl_oatsig ) + sig_min_obs ot = otmu + theano_rng.normal((n_o,n_samples))*otsig oht = ohtmu + theano_rng.normal((n_oh,n_samples))*ohtsig oat = oatmu + theano_rng.normal((n_oa,n_samples))*oatsig return st, ohtmu, ohtsig, ot, oht, oat, prior_stmu, prior_stsig # Define initial state and action
def get_train_function(self): # specify the computational graph num_param_vecs = T.scalar('num_param_vecs') # weight = theano.shared(np.random.randn(len(self.feature_map), self.num_param_vecs), name='weight') weight = theano.shared(np.zeros((len(self.feature_map), self.num_param_vecs)), name='weight') feat_mat = sparse.csr_matrix(name='feat_mat') pred = T.nnet.sigmoid( sparse.dot(feat_mat, weight) ) # one-vs-rest o_pred = ifelse(T.gt(self.num_param_vecs, 1), pred / pred.sum(axis=1).reshape((pred.shape[0], 1)), T.concatenate( [pred, 1-pred], axis=1 ) ) f_target = T.matrix('f_target') f_mask_mat = sparse.csr_matrix(name='f_mask_mat') f_sum_pred = sparse.dot( f_mask_mat, o_pred ) f_pred = f_sum_pred / f_sum_pred.sum(axis=1).reshape((f_sum_pred.shape[0], 1)) i_target = T.matrix('i_target') i_mask_mat = sparse.csr_matrix(name='l_mask_mat') i_pred = sparse.dot( i_mask_mat, pred ) # objective = self.param.feature_lambda * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2 objective = 0.0 * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.binary_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2 grad_weight = T.grad(objective, weight) # print 'Compiling function ...' # compile the function train = theano.function(inputs = [feat_mat, f_mask_mat, f_target, i_mask_mat, i_target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] ) return train
def Recurrence(processed_frames, h0, reset): """ processed_frames.shape: (batch size, n frames, DIM) h0.shape: (batch size, N_GRUS, DIM) reset.shape: () output.shape: (batch size, n frames, DIM) """ # print "warning no recurrence" # return T.zeros_like(processed_frames), h0 learned_h0 = lib.param( 'Recurrence.h0', numpy.zeros((N_GRUS, DIM), dtype=theano.config.floatX) ) learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM) learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim) h0 = theano.ifelse.ifelse(reset, learned_h0, h0) gru0 = lib.ops.LowMemGRU('Recurrence.GRU0', DIM, DIM, processed_frames, h0=h0[:, 0]) grus = [gru0] for i in xrange(1, N_GRUS): gru = lib.ops.LowMemGRU('Recurrence.GRU'+str(i), DIM, DIM, grus[-1], h0=h0[:, i]) grus.append(gru) last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1) return (grus[-1], last_hidden)
def _forward(self): eps = self.eps param_size = (1, 1, self.n_output, 1, 1) self.gamma = self.declare(param_size) self.beta = self.declare(param_size) mean = self.inpt.mean(axis=[0, 1, 3, 4], keepdims=False) std = self.inpt.std(axis=[0, 1, 3, 4], keepdims=False) self._setup_running_metrics(self.n_output) self.running_mean.default_update = ifelse( self.training, (1.0 - self.alpha) * self.running_mean + self.alpha * mean, self.running_mean ) self.running_std.default_update = ifelse( self.training, (1.0 - self.alpha) * self.running_std + self.alpha * std, self.running_std ) # This will be optimized away, but ensures the running mean and the running std get updated. # Reference: https://gist.github.com/f0k/f1a6bd3c8585c400c190#file-batch_norm-py-L86 mean += 0 * self.running_mean std += 0 * self.running_std use_mean = ifelse(self.training, mean, self.running_mean) use_std = ifelse(self.training, std, self.running_std) use_mean = use_mean.dimshuffle('x', 'x', 0, 'x', 'x') use_std = use_std.dimshuffle('x', 'x', 0, 'x', 'x') norm_inpt = (self.inpt - use_mean) / (use_std + eps) self.output = self.gamma * norm_inpt + self.beta
def process_pre_post_w(padding_arr, zeros_arr): argmax = T.argmax(padding_arr) zeros_arr = ifelse(T.eq(padding_arr[argmax], 0), zeros_arr, T.set_subtensor(zeros_arr[argmax-2:argmax+3], 1.5 / (T.sum(padding_arr[argmax-2:argmax+3])))) return_arr = (zeros_arr * padding_arr)[2: -2] return return_arr
def __call__(self, input): mean = input.mean(self.axes, keepdims=True) std = input.std(self.axes, keepdims=True) + self.epsilon # Don't batchnoramlise a single data point mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype)) std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype)) return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
def forward_batch(self, x, mask): """ :param x: (batch, length, dim) :param mask: (batch, length, ) :return: (batch, length, hidden_dim) """ # conv_after_length = length - kernel + 2 * padding_size + 1 new_x = x if self.padding_size > 0: # (padding_size + length + padding_size, dim) new_x = temporal_padding_3d(x, (self.padding_size, self.padding_size)) # (batch, conv_after_length) mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=self.padding_size) elif self.padding_size == 0: # (batch, conv_after_length) mask = temporal_padding_mask(mask, kernel_size=self.kernel_size, padding_size=0) else: raise RuntimeError("Dilation Rate >= 0") # safe_x = temporal_padding_3d(x, (0, self.kernel_size - x.shape[1])) # safe_mask = T.ones((x.shape[0], ), dtype=theano.config.floatX).dimshuffle([0, 'x']) # !!! convert safe_mask from col to matrix # safe_mask = T.unbroadcast(safe_mask, 1) # x, mask = ifelse(T.gt(self.kernel_size - x.shape[1], 0), # (safe_x, safe_mask), # (new_x, mask)) # (batch, conv_after_length, hidden_dim) conv_result = self.forward_conv_batch(new_x) # new_x = Print(new_x) # mask = Print()(mask) pooling_result = get_pooling_batch(conv_result, mask, self.pooling) dropout_out = dropout_from_layer(pooling_result, self.dropout) return self.act.activate(dropout_out + self.b)
def test_not_lazy_if_inplace(self): # Tests that if the outputs are scalars and the graph is big, # we disable the inplace opt to speed up optimization x = tensor.vector('x', dtype=self.dtype) y = tensor.vector('y', dtype=self.dtype) c = tensor.iscalar('c') mode = theano.compile.get_mode(self.mode).excluding( # Disable many opt to keep the graph big enough to disable # the opt. 'fusion', 'local_add_canonizer', 'inplace', 'constant_folding', 'constant_folding') y2 = reduce(lambda x, y: x + y, [y] + list(range(200))) f = theano.function([c, x, y], ifelse(c, x, y2), mode=mode) # For not inplace ifelse ifnode = [n for n in f.maker.fgraph.toposort() if isinstance(n.op, IfElse)] assert len(ifnode) == 1 assert not ifnode[0].op.as_view rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype) vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype) assert numpy.allclose(vx, f(1, vx, vy)) assert numpy.allclose(vy + sum(range(200)), f(0, vx, vy))
def test_mixed_dtype(self): x1 = tensor.vector('x1', dtype='int32') x2 = tensor.vector('x2', dtype=self.dtype) y1 = tensor.vector('y1', dtype='int32') y2 = tensor.vector('y2', dtype=self.dtype) c = tensor.iscalar('c') f = theano.function([c, x1, x2, y1, y2], ifelse(c, (x1, x2), (y1, y2)), mode=self.mode) self.assertFunctionContains1(f, self.get_ifelse(2)) rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx1 = numpy.asarray(rng.uniform(size=(xlen,)) * 3, 'int32') vx2 = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype) vy1 = numpy.asarray(rng.uniform(size=(ylen,)) * 3, 'int32') vy2 = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype) o1, o2 = f(1, vx1, vx2, vy1, vy2) assert numpy.allclose(vx1, o1) assert numpy.allclose(vx2, o2) o1, o2 = f(0, vx1, vx2, vy1, vy2) assert numpy.allclose(vy1, o1) assert numpy.allclose(vy2, o2)
def test_lazy_if_on_generics(self): x = theano.generic() y = theano.generic() c = tensor.iscalar('c') f = theano.function([c, x, y], ifelse(c, x, y)) vx = ['testX'] vy = ['testY'] assert f(1, vx, vy) == vx assert f(0, vx, vy) == vy
def test_grad_lazy_if(self): # Tests that we can compute the gradients through lazy if x = tensor.vector('x', dtype=self.dtype) y = tensor.vector('y', dtype=self.dtype) c = tensor.iscalar('c') z = ifelse(c, x, y) gx, gy = tensor.grad(z.sum(), [x, y]) f = theano.function([c, x, y], [self.cast_output(gx), self.cast_output(gy)], mode=self.mode) # There is only 2 of the 3 ifelse that are moved on the GPU. # The one that stay on the CPU is for the shape. self.assertFunctionContains(f, self.get_ifelse(1), min=2, max=3) rng = numpy.random.RandomState(utt.fetch_seed()) xlen = rng.randint(200) ylen = rng.randint(200) vx = numpy.asarray(rng.uniform(size=(xlen,)), self.dtype) vy = numpy.asarray(rng.uniform(size=(ylen,)), self.dtype) gx0, gy0 = f(1, vx, vy) assert numpy.allclose(gx0.shape, vx.shape) assert numpy.allclose(gy0.shape, vy.shape) assert numpy.all(numpy.asarray(gx0) == 1.) assert numpy.all(numpy.asarray(gy0) == 0.) gx0, gy0 = f(0, vx, vy) assert numpy.allclose(gx0.shape, vx.shape) assert numpy.allclose(gy0.shape, vy.shape) assert numpy.all(numpy.asarray(gx0) == 0.) assert numpy.all(numpy.asarray(gy0) == 1.)
def test_multiple_out_grad(self): # Tests that we can compute the gradients through lazy if x1 = tensor.vector('x1') x2 = tensor.vector('x2') y1 = tensor.vector('y1') y2 = tensor.vector('y2') c = tensor.iscalar('c') z = ifelse(c, (x1, x2), (y1, y2)) grads = tensor.grad(z[0].sum() + z[1].sum(), [x1, x2, y1, y2]) f = theano.function([c, x1, x2, y1, y2], grads) rng = numpy.random.RandomState(utt.fetch_seed()) lens = [rng.randint(200) for i in range(4)] values = [numpy.asarray(rng.uniform(size=(l,)), theano.config.floatX) for l in lens] outs_1 = f(1, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert numpy.all(outs_1[0] == 1.) assert numpy.all(outs_1[1] == 1.) assert numpy.all(outs_1[2] == 0.) assert numpy.all(outs_1[3] == 0.) outs_0 = f(0, *values) assert all([x.shape[0] == y for x, y in zip(outs_1, lens)]) assert numpy.all(outs_0[0] == 0.) assert numpy.all(outs_0[1] == 0.) assert numpy.all(outs_0[2] == 1.) assert numpy.all(outs_0[3] == 1.)
def test_dtype_mismatch(self): rng = numpy.random.RandomState(utt.fetch_seed()) data = rng.rand(5).astype(self.dtype) x = self.shared(data) y = tensor.cast(x * 10, 'int8') cond = theano.tensor.iscalar('cond') self.assertRaises(TypeError, ifelse, cond, x, y) self.assertRaises(TypeError, ifelse, cond, y, x)
def test_ndim_mismatch(self): rng = numpy.random.RandomState(utt.fetch_seed()) data = rng.rand(5).astype(self.dtype) x = self.shared(data) y = tensor.col('y', self.dtype) cond = theano.tensor.iscalar('cond') self.assertRaises(TypeError, ifelse, cond, x, y) self.assertRaises(TypeError, ifelse, cond, y, x)
def test_broadcast_mismatch(self): rng = numpy.random.RandomState(utt.fetch_seed()) data = rng.rand(5).astype(self.dtype) x = self.shared(data) # print x.broadcastable y = tensor.row('y', self.dtype) # print y.broadcastable cond = theano.tensor.iscalar('cond') self.assertRaises(TypeError, ifelse, cond, x, y) self.assertRaises(TypeError, ifelse, cond, y, x)
def test_merge(self): raise SkipTest("Optimization temporarily disabled") x = tensor.vector('x') y = tensor.vector('y') c = tensor.iscalar('c') z1 = ifelse(c, x + 1, y + 1) z2 = ifelse(c, x + 2, y + 2) z = z1 + z2 f = theano.function([c, x, y], z) assert len([n for n in f.maker.fgraph.toposort() if isinstance(n.op, IfElse)]) == 1
def test_remove_useless_inputs1(self): raise SkipTest("Optimization temporarily disabled") x = tensor.vector('x') y = tensor.vector('y') c = tensor.iscalar('c') z = ifelse(c, (x, x), (y, y)) f = theano.function([c, x, y], z) ifnode = [n for n in f.maker.fgraph.toposort() if isinstance(n.op, IfElse)][0] assert len(ifnode.inputs) == 3
def test_remove_useless_inputs2(self): raise SkipTest("Optimization temporarily disabled") x1 = tensor.vector('x1') x2 = tensor.vector('x2') y1 = tensor.vector('y1') y2 = tensor.vector('y2') c = tensor.iscalar('c') z = ifelse(c, (x1, x1, x1, x2, x2), (y1, y1, y2, y2, y2)) f = theano.function([c, x1, x2, y1, y2], z) ifnode = [x for x in f.maker.fgraph.toposort() if isinstance(x.op, IfElse)][0] assert len(ifnode.outputs) == 3
def test_pushout1(self): raise SkipTest("Optimization temporarily disabled") x1 = tensor.scalar('x1') x2 = tensor.scalar('x2') y1 = tensor.scalar('y1') y2 = tensor.scalar('y2') w1 = tensor.scalar('w1') w2 = tensor.scalar('w2') c = tensor.iscalar('c') x, y = ifelse(c, (x1, y1), (x2, y2), name='f1') z = ifelse(c, w1, w2, name='f2') out = x * z * y f = theano.function([x1, x2, y1, y2, w1, w2, c], out, allow_input_downcast=True) assert isinstance(f.maker.fgraph.toposort()[-1].op, IfElse) rng = numpy.random.RandomState(utt.fetch_seed()) vx1 = rng.uniform() vx2 = rng.uniform() vy1 = rng.uniform() vy2 = rng.uniform() vw1 = rng.uniform() vw2 = rng.uniform() assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 1), vx1 * vy1 * vw1) assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 0), vx2 * vy2 * vw2)
def test_merge_ifs_true_false(self): raise SkipTest("Optimization temporarily disabled") x1 = tensor.scalar('x1') x2 = tensor.scalar('x2') y1 = tensor.scalar('y1') y2 = tensor.scalar('y2') w1 = tensor.scalar('w1') w2 = tensor.scalar('w2') c = tensor.iscalar('c') out = ifelse(c, ifelse(c, x1, x2) + ifelse(c, y1, y2) + w1, ifelse(c, x1, x2) + ifelse(c, y1, y2) + w2) f = theano.function([x1, x2, y1, y2, w1, w2, c], out, allow_input_downcast=True) assert len([x for x in f.maker.fgraph.toposort() if isinstance(x.op, IfElse)]) == 1 rng = numpy.random.RandomState(utt.fetch_seed()) vx1 = rng.uniform() vx2 = rng.uniform() vy1 = rng.uniform() vy2 = rng.uniform() vw1 = rng.uniform() vw2 = rng.uniform() assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 1), vx1 + vy1 + vw1) assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 0), vx2 + vy2 + vw2)
def test_grad_test_values(self): """ Regression test for test values of `ifelse` gradient. """ backup = theano.config.compute_test_value theano.config.compute_test_value = 'raise' try: x = tensor.scalar('x') x.tag.test_value = 1 # Used to crash due to undefined test value. tensor.grad(ifelse(0, x, x), x) finally: theano.config.compute_test_value = backup
def test_grad_int_value(self): w = theano.shared(numpy.random.rand(10)) b = theano.shared(numpy.random.rand()) params = [w, b] x = tensor.vector() y = tensor.scalar() score = w.dot(x) + b correct = (score * y > 0) loss = ifelse(correct, 0, 1) [(param, param - 0.5 * tensor.grad(cost=loss, wrt=param)) for param in params]
def test_ifelse(self): config1 = theano.config.profile config2 = theano.config.profile_memory try: theano.config.profile = True theano.config.profile_memory = True a, b = T.scalars('a', 'b') x, y = T.scalars('x', 'y') z = ifelse(T.lt(a, b), x * 2, y * 2) p = theano.ProfileStats(False) if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]: m = "FAST_RUN" else: m = None f_ifelse = theano.function([a, b, x, y], z, profile=p, name="test_ifelse", mode=m) val1 = 0. val2 = 1. big_mat1 = 10 big_mat2 = 11 f_ifelse(val1, val2, big_mat1, big_mat2) finally: theano.config.profile = config1 theano.config.profile_memory = config2
def test_ifelse(): a = T.scalar() b = generic() c = generic() notimpl = NotImplementedOp() lazys = [True] # We need lazy to end up being True for this test. if theano.config.vm.lazy in [True, None]: lazys = [True, None] cloops = [True, False] if theano.config.cxx == "": cloops = [False] for cloop in cloops: for lazy in lazys: linker = theano.gof.vm.VM_Linker(use_cloop=cloop, lazy=lazy) f = function([a, b, c], ifelse(a, notimpl(b), c), mode=Mode(linker=linker, optimizer='fast_run')) try: # print "case 1" f(1, 'a', 'b') assert False except NotImplementedOp.E: pass # print "... passed" # print "case 2" # print f(0, 'a', 'b') assert f(0, 'a', 'b') == 'b' # print "... passed"
def test_callback_with_ifelse(self): a, b, c = tensor.scalars('abc') f = function([a, b, c], ifelse(a, 2 * b, 2 * c), mode=Mode( optimizer=None, linker=vm.VM_Linker(callback=self.callback))) f(1, 2, 3) assert self.n_callbacks['IfElse'] == 2
def test_no_leak_many_call_lazy(): # Verify no memory leaks when calling a function a lot of times # This isn't really a unit test, you have to run it and look at top to # see if there's a leak def build_graph(x, depth=5): z = x for d in range(depth): z = ifelse(z.mean() > 0.5, -z, z) return z def time_linker(name, linker): steps_a = 10 x = tensor.dvector() a = build_graph(x, steps_a) f_a = function([x], a, mode=Mode(optimizer=None, linker=linker())) inp = numpy.random.rand(1000000) for i in xrange(100): f_a(inp) if 0: # this doesn't seem to work, prints 0 for everything import resource pre = resource.getrusage(resource.RUSAGE_SELF) post = resource.getrusage(resource.RUSAGE_SELF) print(pre.ru_ixrss, post.ru_ixrss) print(pre.ru_idrss, post.ru_idrss) print(pre.ru_maxrss, post.ru_maxrss) print(1) time_linker('vmLinker_C', lambda: vm.VM_Linker(allow_gc=False, use_cloop=True)) print(2) time_linker('vmLinker', lambda: vm.VM_Linker(allow_gc=False, use_cloop=False))
def ifelse(condition, op1, op2): return theano.ifelse.ifelse(condition, op1, op2)
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2): print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum g = T.grad(objective.sum(), w, disconnected_inputs='warn') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(new[it],n_accum), 0) update = T.eq(T.mod(new[it],n_accum), n_accum-1) for i in range(len(w)): mom1 = G.sharedf(w[i].get_value() * 0.) _max = G.sharedf(w[i].get_value() * 0.) g_sum = G.sharedf(w[i].get_value() * 0.) #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's new[g_sum] = ifelse(reset, g[i], g_sum + g[i]) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[w[i]] = ifelse(update, w[i] + alpha * new[mom1] / new[_max], w[i]) return new # AdaMax that keeps running average of parameter
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1): if n_accum == 1: return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3) print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(it,n_accum), 0) update = T.eq(T.mod(it,n_accum), n_accum-1) ws_avg = [] for j in range(len(ws)): w_avg = {} for i in ws[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0.) w_avg[i] = G.sharedf(_w.get_value()) g_sum = G.sharedf(_w.get_value() * 0.) new[g_sum] = ifelse(reset, _g, g_sum + _g) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w) new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i]) ws_avg += [w_avg] return new, ws_avg