我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.function()。
def prepare_style(self, scale=1.0): """Called each phase of the optimization, process the style image according to the scale, then run it through the model to extract intermediate outputs (e.g. sem4_1) and turn them into patches. """ style_img = self.rescale_image(self.style_img_original, scale) self.style_img = self.model.prepare_image(style_img) style_map = self.rescale_image(self.style_map_original, scale) self.style_map = style_map.transpose((2, 0, 1))[np.newaxis].astype(np.float32) # Compile a function to run on the GPU to extract patches for all layers at once. layer_outputs = zip(self.style_layers, self.model.get_outputs('sem', self.style_layers)) extractor = self.compile([self.model.tensor_img, self.model.tensor_map], self.do_extract_patches(layer_outputs)) result = extractor(self.style_img, self.style_map) # Store all the style patches layer by layer, resized to match slice size and cast to 16-bit for size. self.style_data = {} for layer, *data in zip(self.style_layers, result[0::3], result[1::3], result[2::3]): patches = data[0] l = self.model.network['nn'+layer] l.num_filters = patches.shape[0] // args.slices self.style_data[layer] = [d[:l.num_filters*args.slices].astype(np.float16) for d in data]\ + [np.zeros((patches.shape[0],), dtype=np.float16)] print(' - Style layer {}: {} patches in {:,}kb.'.format(layer, patches.shape, patches.size//1000))
def compile( self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False): if type(s_inputs_) not in (list, tuple): s_inputs_ = [s_inputs_] if isinstance(updates_, dict): updates_= list(updates_.items()) super(VanillaSGD,self).compile( s_inputs_, s_loss_, v_params_, s_reg_=s_reg_, s_grads_=s_grads_, trunc_grad_=trunc_grad_) apply_grad = [(p, p-g*self.s_lr) for p,g in zip( v_params_,self.s_grads)] self.fn_train = th.function( [self.s_lr]+s_inputs_, fetches_, updates=apply_grad+(updates_ if updates_ else []), givens=givens_, on_unused_input='warn', profile = profile_ ) return self.fn_train
def compile(self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False): def get_shared_shape(v): return v.get_value(borrow=True, return_internal_type=True).shape if type(s_inputs_) not in (list, tuple): s_inputs_ = [s_inputs_] if isinstance(updates_, dict): updates_= list(updates_.items()) super(AdamSGD,self).compile( s_inputs_, s_loss_, v_params_, s_reg_=s_reg_, s_grads_=s_grads_, trunc_grad_=trunc_grad_) self.v_m = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_m_'+p.name if p.name is not None else None) for p in v_params_] self.v_v = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_v_'+p.name if p.name is not None else None) for p in v_params_] s_b1 = T.scalar('adam_b1'); s_b2 = T.scalar('adam_b2') s_b1s = T.scalar('adam_b1s'); s_b2s = T.scalar('adam_b2s') update_m = [(m, (m*s_b1 + (1.-s_b1)*g)) for m,g in zip(self.v_m,self.s_grads)] update_v = [(v, (v*s_b2 + (1.-s_b2)*g*g)) for v,g in zip(self.v_v,self.s_grads)] apply_grad = [(p, p-(s_b1s*m*self.s_lr)/(T.sqrt(s_b2s*v)+self.eps)) for p,m,v in zip(v_params_,self.v_m,self.v_v)] self.fn_train = th.function( inputs=[self.s_lr]+s_inputs_+[s_b1,s_b2,s_b1s,s_b2s], outputs=fetches_, updates=update_m+update_v+apply_grad+(updates_ if updates_ else []), on_unused_input='warn', givens=givens_, profile=profile_) self.fn_rst = th.function(inputs=[], updates=[(v, T.zeros_like(v)) for v in self.v_m+self.v_v], profile=profile_) return self.fn_train
def setupTrain(self): # train_model is a function that updates the model parameters by SGD opt = Optimizer(self.grads, self.params) updates = opt.RMSProp(self.learning_rate, 0.9, 1.0/100.) batch_size = self.cfgParams.batch_size givens_train = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} givens_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] print("compiling train_model() ... ") self.train_model = theano.function(inputs=[self.index, self.learning_rate], outputs=self.cost, updates=updates, givens=givens_train) print("done.") print("compiling test_model_on_train() ... ") batch_size = self.cfgParams.batch_size givens_test_on_train = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} givens_test_on_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] self.test_model_on_train = theano.function(inputs=[self.index], outputs=self.errors, givens=givens_test_on_train) print("done.")
def setupValidate(self): batch_size = self.cfgParams.batch_size givens_val = {self.x: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} givens_val[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size] print("compiling validation_error() ... ") self.validation_error = theano.function(inputs=[self.index], outputs=self.errors, givens=givens_val) print("done.") print("compiling validation_cost() ... ") self.validation_cost = theano.function(inputs=[self.index], outputs=self.cost, givens=givens_val) print("done.") # debug and so print("compiling compute_val_descr() ... ") givens_val_descr = {self.x: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} self.compute_val_descr = theano.function(inputs=[self.index], outputs=self.poseNet.output, givens=givens_val_descr) print("done.")
def compile_sample(self): # # for Typical Auto-encoder, only conditional generation is useful. # inputs = T.imatrix() # padded input word sequence (for training) # if self.config['mode'] == 'RNN': # context = alloc_zeros_matrix(inputs.shape[0], self.config['enc_contxt_dim']) # elif self.config['mode'] == 'NTM': # context = T.repeat(self.memory[None, :, :], inputs.shape[0], axis=0) # else: # raise NotImplementedError # pass # sample the memorybook p_dis = self.Prior() l = T.iscalar() u = self.rng.uniform((l, p_dis.shape[-2], p_dis.shape[-1])) binarybook = T.cast(u <= p_dis, dtype=theano.config.floatX) memorybook = self.Trans(binarybook) self.take = theano.function([l], [binarybook, memorybook], name='take_action') # compile the sampler. self.decoder.build_sampler() logger.info('sampler function compile done.')
def compile_inference(self): """ build the hidden action prediction. """ inputs = T.imatrix() # padded input word sequence (for training) if self.config['mode'] == 'RNN': context = alloc_zeros_matrix(inputs.shape[0], self.config['enc_contxt_dim']) elif self.config['mode'] == 'NTM': context = T.repeat(self.memory[None, :, :], inputs.shape[0], axis=0) else: raise NotImplementedError # encoding memorybook = self.encoder.build_encoder(inputs, context) # get Q(a|y) = sigmoid(.|Posterior * encoded) q_dis = self.Post(memorybook) p_dis = self.Prior() self.inference_ = theano.function([inputs], [memorybook, q_dis, p_dis]) logger.info("inference function compile done.")
def compile_inference(self): """ build the hidden action prediction. """ inputs = T.imatrix() # padded input word sequence (for training) if self.config['mode'] == 'RNN': context = alloc_zeros_matrix(inputs.shape[0], self.config['enc_contxt_dim']) elif self.config['mode'] == 'NTM': context = T.repeat(self.memory[None, :, :], inputs.shape[0], axis=0) else: raise NotImplementedError # encoding memorybook = self.encoder.build_encoder(inputs, context) # get Q(a|y) = sigmoid(.|Posterior * encoded) q_dis = memorybook p_dis = self.Prior() self.inference_ = theano.function([inputs], [memorybook, q_dis, p_dis]) logger.info("inference function compile done.")
def compile_sample(self): # for Typical Auto-encoder, only conditional generation is useful. inputs = T.imatrix() # padded input word sequence (for training) if self.config['mode'] == 'RNN': context = alloc_zeros_matrix(inputs.shape[0], self.config['enc_contxt_dim']) elif self.config['mode'] == 'NTM': context = T.repeat(self.memory[None, :, :], inputs.shape[0], axis=0) else: raise NotImplementedError pass # encoding memorybook = self.encoder.build_encoder(inputs, context) self.memorize = theano.function([inputs], memorybook, name='memorize') # compile the sampler. self.decoder.build_sampler() logger.info('sampler function compile done.')
def compile_encoder(self, with_context=False, return_embed=False, return_sequence=False): source = T.imatrix() self.return_embed = return_embed self.return_sequence = return_sequence if with_context: context = T.matrix() self.encode = theano.function([source, context], self.build_encoder(source, context, return_embed=return_embed, return_sequence=return_sequence)) else: self.encode = theano.function([source], self.build_encoder(source, None, return_embed=return_embed, return_sequence=return_sequence))
def sgd_optimizer(model, lr=0.001, momentum=0.9): lr = theano.shared(np.array(lr).astype(theano.config.floatX)) # Make sure momentum is a sane value assert momentum < 1 and momentum >= 0 # the updates of SGD with momentum updates = [] grads = T.grad(model.costs[0], model.params) for param, grad in zip(model.params, grads): param_update = theano.shared(param.get_value()*0.) updates.append((param, param - lr * param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*grad)) train_func = theano.function(model.inputs, model.costs, updates=updates) valid_func = theano.function(model.inputs, model.costs) return train_func, valid_func
def get_eval_fn(model, in3D=False, use_dice=False): """Compile the evaluation function of the model.""" if use_dice: insec = T.sum(model.trg * model.output, axis=1) tmp = 1 - 2.0 * insec/(T.sum(model.trg, axis=1) + T.sum(model.output, axis=1)) error = T.mean(tmp) else: error = T.mean(T.mean(T.power(model.output - model.trg, 2), axis=1)) if in3D: x = T.tensor4('x') else: x = T.fmatrix("x") y = T.fmatrix("y") theano_arg_vl = [x, y] output_fn_vl = [error, model.output] eval_fn = theano.function( theano_arg_vl, output_fn_vl, givens={model.x: x, model.trg: y}) return eval_fn
def adadelta(tparams, grads, x, y, mask, lengths, cost): zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function([x, y, mask, lengths], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared') updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update') return f_grad_shared, f_update
def adadelta(tparams, grads, weightVector, iVector, jVector, cost): zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function([weightVector, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared') updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update') return f_grad_shared, f_update
def get_output_for(self, input, deterministic=False, **kwargs): def _phase_shift(input,r): bsize,c,a,b = input.shape[0],1,self.output_shape[2]//r,self.output_shape[3]//r X = T.reshape(input, (bsize,r,r,a,b)) X = T.transpose(X, (0, 3,4,1,2)) # bsize, a, b, r2,r1 X = T.split(x=X,splits_size=[1]*a,n_splits=a,axis=1) # a, [bsize, b, r, r] X = [T.reshape(x,(bsize,b,r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, b, a*r, r X = T.split(x=X,splits_size =[1]*b,n_splits=b,axis=1) # b, [bsize, a*r, r] X = [T.reshape(x,(bsize,a*r,r))for x in X] X = T.concatenate(X,axis=2) # bsize, a*r, b*r return X.dimshuffle(0,'x',1,2) Xc = T.split(x=input,splits_size =[input.shape[1]//self.c]*self.c,n_splits=self.c,axis=1) return T.concatenate([_phase_shift(xc,self.r) for xc in Xc],axis=1) # Multiscale Dilated Convolution Block # This function (not a layer in and of itself, though you could make it one) returns a set of concatenated conv2d and dilatedconv2d layers. # Each layer uses the same basic filter W, operating at a different dilation factor (or taken as the mean of W for the 1x1 conv). # The channel-wise output of each layer is weighted by a set of coefficients, which are initialized to 1 / the total number of dilation scales, # meaning that were starting by taking an elementwise mean. These should be learnable parameters. # NOTES: - I'm considering changing the variable names to be more descriptive, and look less like ridiculous academic code. It's on the to-do list. # - I keep the bias and nonlinearity out of the default definition for this layer, as I expect it to be batchnormed and nonlinearized in the model config.
def predict(): """ An example of how to load a trained model and use it to predict labels. """ # load the saved model classifier = pickle.load(open('best_model.pkl')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) # We can test it on some examples from test test dataset='mnist.pkl.gz' datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) print("Predicted values for the first 10 examples in test set:") print(predicted_values)
def content_loss(self): """Return a list of Theano expressions for the error function, measuring how different the current image is from the reference content that was loaded. """ content_loss = [] if args.content_weight == 0.0: return content_loss # First extract all the features we need from the model, these results after convolution. extractor = theano.function([self.model.tensor_img], self.model.get_outputs('conv', self.content_layers)) result = extractor(self.content_img) # Build a list of loss components that compute the mean squared error by comparing current result to desired. for l, ref in zip(self.content_layers, result): layer = self.model.tensor_outputs['conv'+l] loss = T.mean((layer - ref) ** 2.0) content_loss.append(('content', l, args.content_weight * loss)) print(' - Content layer conv{}: {} features in {:,}kb.'.format(l, ref.shape[1], ref.size//1000)) return content_loss
def _build_validate_function(self): print 'building validate function' t1 = datetime.datetime.now() data = self.val_data captions = self.val_data_captions self._index_im_val = T.vector(dtype='int32') # index to the minibatch self._index_cap_val = T.vector(dtype='int32') self._cap_len_val = T.scalar(dtype='int32') self._validate_function = theano.function(inputs=[self._index_im_val, self._index_cap_val, self._cap_len_val, self._run_steps], outputs=[self._kl_final, self._logpxz, self._log_likelihood], updates=self._updates_train, givens={ self._x: data[self._index_im_val], self._y: captions[self._index_cap_val,0:self._cap_len_val] }) t2 = datetime.datetime.now() print (t2-t1)
def _build_validate_function(self, isVal=True): print 'building validate function' t1 = datetime.datetime.now() if isVal: data = self.val_data else: data = self.test_data self._index_val = T.scalar(dtype='int32') # index to the minibatch self._validate_function = theano.function(inputs=[self._index_val, self._run_steps], outputs=[self._kl_final, self._logpxz, self._log_likelihood], updates=self._updates_train, givens={ self._x: data[(self._index_val * batch_size):((self._index_val + 1) * batch_size)].astype(floatX) }) t2 = datetime.datetime.now() print (t2-t1)
def _build_validate_function(self): print 'building validate function' t1 = datetime.datetime.now() data = self.val_data captions = self.val_captions self._index_im_val = T.vector(dtype='int32') # index to the minibatch self._index_cap_val = T.vector(dtype='int32') self._validate_function = theano.function(inputs=[self._index_im_val, self._index_cap_val, self._run_steps], outputs=[self._kl_final, self._logpxz, self._log_likelihood], updates=self._updates_train, givens={ self._x: data[self._index_im_val], self._y: captions[self._index_cap_val] }) t2 = datetime.datetime.now() print (t2-t1)
def get_corrupted_input(self, input, corruption_level): """This function keeps ``1-corruption_level`` entries of the inputs the same and zero-out randomly selected subset of size ``coruption_level`` Note : first argument of theano.rng.binomial is the shape(size) of random numbers that it should produce second argument is the number of trials third argument is the probability of success of any trial this will produce an array of 0s and 1s where 1 has a probability of 1 - ``corruption_level`` and 0 with ``corruption_level`` The binomial function return int64 data type by default. int64 multiplicated by the input type(floatX) always return float64. To keep all data in floatX when floatX is float32, we set the dtype of the binomial to floatX. As in our case the value of the binomial is always 0 or 1, this don't change the result. This is needed to allow the gpu to work correctly as it only support float32 for now. """ return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * input
def test_infer(): data_iter = Euclidean(batch_size=27, dim_in=17) gbn = test_vae.test_build_GBN(dim_in=data_iter.dims[data_iter.name]) inference_args = dict( n_inference_steps=7, pass_gradients=True ) gdir = test_build_gdir(gbn, **inference_args) X = T.matrix('x', dtype=floatX) rval, constants, updates = gdir.inference(X, X) f = theano.function([X], rval.values(), updates=updates) x = data_iter.next()[data_iter.name] results, samples, full_results, updates = gdir(X, X) f = theano.function([X], results.values(), updates=updates) print f(x)
def test_sample(n_steps=3, dim_v=13, batch_size=7): data_iter = euclidean.Euclidean(dims=dim_v, batch_size=batch_size) x = data_iter.next()[data_iter.name] model = test_build(dim_v=dim_v) X = T.matrix('X', dtype=floatX) ph0 = model.ph_v(X) r = model.trng.uniform(size=(X.shape[0], model.dim_h)) h_p = (r <= ph0).astype(floatX) outs, updates = model.sample(h_p, n_steps=n_steps) keys = outs.keys() f = theano.function([X], outs.values(), updates=updates) values = f(x) outs = model(X, n_chains=batch_size, n_steps=n_steps) results, samples, updates, constants = outs f = theano.function([X], results.values(), updates=updates) f(x)
def sgd(lr, tparams, grads, inp, cost, extra_ups=[], extra_outs=[], exclude_params=set([])): '''Stochastic gradient descent''' gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function( inp, [cost]+extra_outs, updates=gsup+extra_ups, profile=profile) pup = [(p, p - lr * g) for p, g in zip(tools.itemlist(tparams), gshared) if p.name not in exclude_params] if not isinstance(lr, list): lr = [lr] f_update = theano.function(lr, [], updates=pup, profile=profile) return f_grad_shared, f_update
def init_weights(model, weight_noise=False, weight_scale=0.001, dropout=False, **kwargs): '''Inialization function for weights. Args: model (Layer). weight_noise (bool): noise the weights. weight_scale (float): scale for weight initialization. dropout (bool): use dropout. **kwargs: extra kwargs. Returns: dict: extra kwargs. ''' model.weight_noise = weight_noise model.weight_scale = weight_scale model.dropout = dropout return kwargs
def init_rngs(model, rng=None, trng=None, **kwargs): '''Initialization function for RNGs. Args: model (Layer). rng (np.randomStreams). trng (theano.randomStreams). **kwargs: extra kwargs. Returns: dict: extra kwargs. ''' if rng is None: rng = rng_ model.rng = rng if trng is None: model.trng = RandomStreams(random.randint(0, 10000)) else: model.trng = trng return kwargs
def _slice2(_x, start, end): '''Slightly different slice function than above. Args: _x (T.tensor). start (int). end (int). Returns: T.tensor. ''' if _x.ndim == 1: return _x[start:end] elif _x.ndim == 2: return _x[:, start:end] elif _x.ndim == 3: return _x[:, :, start:end] elif _x.ndim == 4: return _x[:, :, :, start:end] else: raise ValueError('Number of dims (%d) not supported' ' (but can add easily here)' % _x.ndim)
def test_get_output_for(self): keys_var = T.ftensor3() values_var = T.ftensor3() mask_var = T.fmatrix() queries_var = T.ftensor3() keys_layer = L.InputLayer((None, None, 3), input_var=keys_var) values_layer = L.InputLayer((None, None, 5), input_var=values_var) mask_layer = L.InputLayer((None, None), input_var=mask_var) queries_layer = L.InputLayer((None, None, 7), input_var=queries_var) attention_layer = BahdanauKeyValueAttentionLayer([keys_layer, values_layer, mask_layer, queries_layer], 9) attention_outputs = L.get_output(attention_layer) fn = theano.function([keys_var, values_var, mask_var, queries_var], attention_outputs, on_unused_input='warn') keys = np.random.rand(32, 13, 3).astype(np.float32) values = np.random.rand(32, 13, 5).astype(np.float32) mask = np.random.rand(32, 13).astype(np.float32) queries = np.random.rand(32, 17, 7).astype(np.float32) _att = fn(keys, values, mask, queries) self.assertEqual((32, 17, 5), _att.shape)
def compile(self): x_train = T.tensor4('x_train') actions_train = T.matrix('actions_train') y_train = T.matrix('y_train') cost_function = self.squared_error(x_train, actions_train, y_train) self.train_function = theano.function([x_train, actions_train, y_train], cost_function, updates=self.sgd(cost_function, self.params), on_unused_input='ignore', allow_input_downcast=True) x_pred = T.tensor3('x_pred') actions_pred = T.vector('actions_pred') output_function = self.output(x_pred, actions_pred) self.predict_function = theano.function([x_pred, actions_pred], output_function, on_unused_input='ignore', allow_input_downcast=True) return self
def predict(): """ An example of how to load a trained model and use it to predict labels. """ # load the saved model classifier = cPickle.load(open('best_model.pkl')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) # We can test it on some examples from test test dataset='mnist.pkl.gz' datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) print ("Predicted values for the first 10 examples in test set:") print predicted_values
def _generate_train_model_function(self, scores): u = T.lvector('u') i = T.lvector('i') j = T.lvector('j') self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W'); self.S = theano.shared(scores, name='S'); x_ui = T.dot(self.W, self.S[u,i,:].T); x_uj = T.dot(self.W, self.S[u,j,:].T); x_uij = x_ui - x_uj; obj = T.sum( T.log(T.nnet.sigmoid(x_uij)).sum() - \ self._lambda_w * 0.5 * (self.W ** 2).sum() ) cost = -obj g_cost_W = T.grad(cost=cost, wrt=self.W) updates = [ (self.W, self.W - self._learning_rate * g_cost_W) ] self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
def compile_function(inputs=None, outputs=None, updates=None, givens=None, log_name=None, **kwargs): import theano if log_name: msg = Message("Compiling function %s" % log_name) msg.__enter__() ret = theano.function( inputs=inputs, outputs=outputs, updates=updates, givens=givens, on_unused_input='ignore', allow_input_downcast=True, **kwargs ) if log_name: msg.__exit__(None, None, None) return ret
def hamiltonian(pos, vel, energy_fn): """ Returns the Hamiltonian (sum of potential and kinetic energy) for the given velocity and position. Parameters ---------- pos: theano matrix Symbolic matrix whose rows are position vectors. vel: theano matrix Symbolic matrix whose rows are velocity vectors. energy_fn: python function Python function, operating on symbolic theano variables, used tox compute the potential energy at a given position. Returns ------- return: theano vector Vector whose i-th entry is the Hamiltonian at position pos[i] and velocity vel[i]. """ # assuming mass is 1 return energy_fn(pos) + kinetic_energy(vel)
def draw(self, **kwargs): """ Returns a new position obtained after `n_steps` of HMC simulation. Parameters ---------- kwargs: dictionary The `kwargs` dictionary is passed to the shared variable (self.positions) `get_value()` function. For example, to avoid copying the shared variable value, consider passing `borrow=True`. Returns ------- rval: numpy matrix Numpy matrix whose of dimensions similar to `initial_position`. """ self.simulate() return self.positions.get_value(borrow=False)
def build_model(model_): global fn_predict, fn_record global g_ozer, g_mdl g_ozer = dict(simple=VanillaSGD, adam=AdamSGD)[OZER]() g_ozer.lr = LEARN_RATE s_x = T.tensor4('x') s_y = T.ivector('y') s_pdpo = T.scalar() s_out = model_(s_x, s_pdpo) s_y_onehot = T.extra_ops.to_one_hot(s_y, len(g_dataset.label_map)) s_loss = T.mean(-s_y_onehot*T.log(s_out + 1e-3)) s_accr = T.mean( T.switch( T.eq(T.argmax(s_out, axis=1), T.argmax(s_y_onehot, axis=1)), 1, 0)) no_dropout = [(s_pdpo, T.constant(0., dtype=th.config.floatX))] fn_predict = th.function( [s_x, s_y], {'pred':s_out, 'accr':s_accr, 'loss':s_loss}, givens=no_dropout, profile=PROFILE) rec_fetches = { 'x': s_x, 'y': s_y, 'pred': s_out} rec_fetches.update(g_mdl.params_di) fn_record = th.function( [s_x, s_y], rec_fetches, givens=no_dropout, profile=PROFILE) g_ozer.compile( [s_x, s_y], s_loss, g_mdl.params_di.values(), fetches_={'pred': s_out, 'loss': s_loss, 'accr': s_accr}, givens_=[(s_pdpo, T.constant(TRAIN_PDPO, dtype=th.config.floatX))], profile_=PROFILE)
def _get_p_from_g(self, cg_id, g, params): """ Utility function to pick the parameter given gradient. """ p_name = re.search('\(dcost_' + cg_id + '/d(.+?)\)', g.name).group(1) return params[p_name]
def f_log_probs(self, probs, x, x_mask, y, y_mask, src_selector, trg_selector, cg=None): y_flat = y.flatten() y_flat_idx = tensor.arange(y_flat.shape[0]) * self.vocab_size + y_flat cost = -tensor.log(probs.flatten()[y_flat_idx]) cost = cost.reshape([y.shape[0], y.shape[1]]) cost = (cost * y_mask).sum(0) func_inps = [x, x_mask, y, y_mask, src_selector, trg_selector] return theano.function( inputs=func_inps, outputs=cost, on_unused_input='warn')
def load_data(data_feeder): """ Helper function to deal with interface of different datasets. `data_feeder` should be `train_feeder`, `valid_feeder`, or `test_feeder`. """ return data_feeder(BATCH_SIZE, SEQ_LEN, OVERLAP, Q_LEVELS, Q_ZERO, Q_TYPE) ### Creating computation graph ###
def load_model(): """ Load the model with saved tables """ # Load model options print('Loading model parameters...') with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: boptions = pkl.load(f) # Load parameters uparams = init_params(uoptions) uparams = load_params(path_to_umodel, uparams) utparams = init_tparams(uparams) bparams = init_params_bi(boptions) bparams = load_params(path_to_bmodel, bparams) btparams = init_tparams(bparams) # Extractor functions print('Compiling encoders...') embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables print('Loading tables...') utable, btable = load_tables() # Store everything we need in a dictionary print('Packing up...') model = {} model['uoptions'] = uoptions model['boptions'] = boptions model['utable'] = utable model['btable'] = btable model['f_w2v'] = f_w2v model['f_w2v2'] = f_w2v2 return model
def get_eval_function(self): """ We should feed in non-dimshuffled inputs x0, mask0 and y0. Used for tracking Dev loss at training time. """ loss = CrossEntropyLoss().connect(self.scores, self.mask, self.y) return theano.function([self.x0, self.mask0, self.y0], [self.pred0, loss], name='f_eval', allow_input_downcast=True, on_unused_input='warn', givens=({self.is_train: numpy.cast['int8'](0)}))
def get_distribution_function(self): """ Return predictions and scores of shape [batch_size, time_steps, label space size]. Used at test time. """ scores0 = self.scores.reshape([self.x.shape[0], self.x.shape[1], self.label_space_size]).dimshuffle(1, 0, 2) return theano.function([self.x0, self.mask0], [self.pred0, scores0], name='f_pred', allow_input_downcast=True, on_unused_input='warn', givens=({self.is_train: numpy.cast['int8'](0)}))
def get_loss_function(self): """ We should feed in non-dimshuffled inputs x0, mask0 and y0. """ loss = CrossEntropyLoss().connect(self.scores, self.mask, self.y) grads = gradient_clipping(tensor.grad(loss, self.params), self.max_grad_norm) updates = adadelta(self.params, grads) return theano.function([self.x0, self.mask0, self.y0], loss, name='f_loss', updates=updates, on_unused_input='warn', givens=({self.is_train: numpy.cast['int8'](1)}))
def load_model(): """ Load the model with saved tables """ # Load model options print 'Loading model parameters...' with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: boptions = pkl.load(f) # Load parameters uparams = init_params(uoptions) uparams = load_params(path_to_umodel, uparams) utparams = init_tparams(uparams) bparams = init_params_bi(boptions) bparams = load_params(path_to_bmodel, bparams) btparams = init_tparams(bparams) # Extractor functions print 'Compiling encoders...' embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables print 'Loading tables...' utable, btable = load_tables() # Store everything we need in a dictionary print 'Packing up...' model = {} model['uoptions'] = uoptions model['boptions'] = boptions model['utable'] = utable model['btable'] = btable model['f_w2v'] = f_w2v model['f_w2v2'] = f_w2v2 return model
def setupDebugFunctions(self): batch_size = self.cfgParams.batch_size print("compiling compute_train_descr() ... ") givens_train_descr = {self.x: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} self.compute_train_descr = theano.function(inputs=[self.index], outputs=self.poseNet.output, givens=givens_train_descr) print("done.")
def setupTrain(self): # train_model is a function that updates the model parameters by SGD opt = Optimizer(self.grads, self.params) self.updates = opt.RMSProp(self.learning_rate, 0.9, 1.0/100.) batch_size = self.cfgParams.batch_size givens_train = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} for i in range(1, self.poseNet.cfgParams.numInputs): givens_train[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] givens_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] print("compiling train_model() ... ") self.train_model = theano.function(inputs=[self.index, self.learning_rate], outputs=self.cost, updates=self.updates, givens=givens_train) print("done.") print("compiling test_model_on_train() ... ") batch_size = self.cfgParams.batch_size givens_test_on_train = {self.x[0]: self.train_data_x[self.index * batch_size:(self.index + 1) * batch_size]} for i in range(1, self.poseNet.cfgParams.numInputs): givens_test_on_train[self.x[i]] = getattr(self, 'train_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] givens_test_on_train[self.y] = self.train_data_y[self.index * batch_size:(self.index + 1) * batch_size] self.test_model_on_train = theano.function(inputs=[self.index], outputs=self.errors, givens=givens_test_on_train) print("done.")
def setupValidate(self): batch_size = self.cfgParams.batch_size givens_val = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} for i in range(1, self.poseNet.cfgParams.numInputs): givens_val[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] givens_val[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size] givens_val_cost = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} for i in range(1, self.poseNet.cfgParams.numInputs): givens_val_cost[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] givens_val_cost[self.y] = self.val_data_y[self.index * batch_size:(self.index + 1) * batch_size] print("compiling validation_cost() ... ") self.validation_cost = theano.function(inputs=[self.index], outputs=self.cost, givens=givens_val_cost) print("done.") print("compiling validation_error() ... ") self.validation_error = theano.function(inputs=[self.index], outputs=self.errors, givens=givens_val) print("done.") # debug and so print("compiling compute_val_descr() ... ") givens_val_descr = {self.x[0]: self.val_data_x[self.index * batch_size:(self.index + 1) * batch_size]} for i in range(1, self.poseNet.cfgParams.numInputs): givens_val_descr[self.x[i]] = getattr(self, 'val_data_x'+str(i))[self.index * batch_size:(self.index + 1) * batch_size] self.compute_val_descr = theano.function(inputs=[self.index], outputs=self.poseNet.output, givens=givens_val_descr) print("done.")
def compile_sample(self): """ build the sampler function here <:::> """ # context vectors (as) self.decoder.build_sampler() l = T.iscalar() logger.info("compiling the computational graph :: action sampler") self.action_sampler = theano.function([l], self.rng.normal((l, self.config['action_dim']))) action = T.matrix() logger.info("compiling the compuational graph ::transform function::") self.transform = theano.function([action], self.context_trans(action)) logger.info("display functions compile done.")