我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用lasagne.layers.get_all_params()。
def fit( self, docs, y, max_epochs, epoch_size=None, val_docs=None, val_y=None, update_params_iter=itertools.repeat([]), save_best=True ): has_val = val_docs is not None with log_time('training...', 'training took {:.0f}s'): params = get_all_params(self.network) best_perf, best_params = None, None epoch_iter = EpochIterator( self.gen_batches, (docs, y), (epoch_size + self.batch_size - 1) // self.batch_size if epoch_size else None ) for i, batches, update_params in zip(range(max_epochs), epoch_iter, update_params_iter): train_res = [self._train(*batch, *update_params) for batch in batches] val_res = np.concatenate( [self._test(*batch[:-1]) for batch in self.gen_batches(val_docs)], axis=0 )[:len(val_y)] if has_val else None perf = self.perf(i, train_res, val_y, val_res) if (has_val and save_best) and (best_perf is None or perf >= best_perf): best_perf = perf best_params = {param: param.get_value() for param in params} if has_val and save_best: for param, value in best_params.items(): param.set_value(value)
def __build_loss_train__fn__(self): # create loss function prediction = layers.get_output(self.net) loss = objectives.categorical_crossentropy(prediction, self.__target_var__) loss = loss.mean() + 1e-4 * regularization.regularize_network_params(self.net, regularization.l2) val_acc = T.mean(T.eq(T.argmax(prediction, axis=1), self.__target_var__),dtype=theano.config.floatX) # create parameter update expressions params = layers.get_all_params(self.net, trainable=True) self.eta = theano.shared(sp.array(sp.float32(0.05), dtype=sp.float32)) update_rule = updates.nesterov_momentum(loss, params, learning_rate=self.eta, momentum=0.9) # compile training function that updates parameters and returns training loss self.__train_fn__ = theano.function([self.__input_var__,self.__target_var__], loss, updates=update_rule) self.__predict_fn__ = theano.function([self.__input_var__], layers.get_output(self.net,deterministic=True)) self.__val_fn__ = theano.function([self.__input_var__,self.__target_var__], [loss,val_acc])
def build_train_func(rank=0, **kwargs): print("rank: {} Building model".format(rank)) resnet = build_resnet() print("Building training function") x = T.ftensor4('x') y = T.imatrix('y') prob = L.get_output(resnet['prob'], x, deterministic=False) loss = T.nnet.categorical_crossentropy(prob, y.flatten()).mean() params = L.get_all_params(resnet.values(), trainable=True) sgd_updates = updates.sgd(loss, params, learning_rate=1e-4) # make a function to compute and store the raw gradient f_train = theano.function(inputs=[x, y], outputs=loss, # (assumes this is an avg) updates=sgd_updates) return f_train, "original"
def get_params_internal(self, **tags): # this gives ALL the vars (not the params values) return L.get_all_params( # this lasagne function also returns all var below the passed layers L.concat(self._output_layers), **tags )
def prep_train(alpha=0.0002, nz=100): E,D=build_net(nz=nz) x = T.tensor4('x') #Get outputs z=E(x), x_hat=D(z) encoding = get_output(E,x) decoding = get_output(D,encoding) #Get parameters of E and D params_e=get_all_params(E, trainable=True) params_d=get_all_params(D, trainable=True) params = params_e + params_d #Calc cost and updates cost = T.mean(squared_error(x,decoding)) grad=T.grad(cost,params) updates = adam(grad,params, learning_rate=alpha) train = theano.function(inputs=[x], outputs=cost, updates=updates) rec = theano.function(inputs=[x], outputs=decoding) test = theano.function(inputs=[x], outputs=cost) return train ,test, rec, E, D
def create_train_func(layers): Xa, Xb = T.tensor4('Xa'), T.tensor4('Xb') Xa_batch, Xb_batch = T.tensor4('Xa_batch'), T.tensor4('Xb_batch') Tp = get_output( layers['trans'], inputs={ layers['inputa']: Xa, layers['inputb']: Xb, }, deterministic=False, ) # transforms: ground-truth, predicted Tg = T.fmatrix('Tg') Tg_batch = T.fmatrix('Tg_batch') theta_gt = Tg.reshape((-1, 2, 3)) theta_pr = Tp.reshape((-1, 2, 3)) # grids: ground-truth, predicted Gg = T.dot(theta_gt, _meshgrid(20, 20)) Gp = T.dot(theta_pr, _meshgrid(20, 20)) train_loss = T.mean(T.sqr(Gg - Gp)) params = get_all_params(layers['trans'], trainable=True) updates = nesterov_momentum(train_loss, params, 1e-3, 0.9) corr_func = theano.function( inputs=[theano.In(Xa_batch), theano.In(Xb_batch), theano.In(Tg_batch)], outputs=[Tp, train_loss], updates=updates, givens={ Xa: Xa_batch, Xb: Xb_batch, # Ia, Ib Tg: Tg_batch, # transform Ia --> Ib } ) return corr_func
def __init__(self, K, vocab_size, num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, save_attn=False): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.char_dim = char_dim self.learning_rate = LEARNING_RATE self.num_chars = num_chars self.use_feat = use_feat self.save_attn = save_attn self.gating_fn = gating_fn self.use_chars = self.char_dim!=0 if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \ T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') feat_var = T.imatrix('feat') doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars') tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask') cloze_var = T.ivector('cloze') self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var] self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = ( self.build_network(K, vocab_size, W_init)) self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val, target_var).mean() self.params = L.get_all_params(self.network, trainable=True) updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate) self.train_fn = theano.function(self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs], updates=updates, on_unused_input='warn') self.validate_fn = theano.function(self.inps, [loss_fn_val, eval_fn_val, predicted_probs_val]+attentions, on_unused_input='warn')
def __init__(self, conf): self.conf = conf if self.conf.act == "linear": self.conf.act = linear elif self.conf.act == "sigmoid": self.conf.act = sigmoid elif self.conf.act == "relu": self.conf.act = rectify elif self.conf.act == "tanh": self.conf.act = tanh else: raise ValueError("Unknown activation function", self.conf.act) input_var_first = T.matrix('inputs1') input_var_second = T.matrix('inputs2') target_var = T.matrix('targets') # create network self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second) self.out = get_output(self.autoencoder) loss = squared_error(self.out, target_var) loss = loss.mean() params = get_all_params(self.autoencoder, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum) # training function self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates) # fuction to reconstruct test_reconstruction = get_output(self.autoencoder, deterministic=True) self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction) # encoding function test_encode = get_output([encoder_first, encoder_second], deterministic=True) self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode) # utils blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0] self.blas_nrm2 = blas('nrm2', np.array([], dtype=float)) self.blas_scal = blas('scal', np.array([], dtype=float)) # load weights if necessary if self.conf.load_model is not None: self.load_model()
def __init__(self, K, vocab_size, num_chars, W_init, regularizer, rlambda, nhidden, embed_dim, dropout, train_emb, subsample, char_dim, use_feat, feat_cnt, save_attn=False): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.subsample = subsample self.char_dim = char_dim self.learning_rate = LEARNING_RATE self.num_chars = num_chars self.use_feat = use_feat self.feat_cnt = feat_cnt self.save_attn = save_attn norm = lasagne.regularization.l2 if regularizer=='l2' else lasagne.regularization.l1 self.use_chars = self.char_dim!=0 if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \ T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') feat_var = T.imatrix('feat') doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars') tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask') cloze_var = T.ivector('cloze') match_feat_var = T.itensor3('match_feat') use_char_var = T.tensor3('use_char') use_char_q_var = T.tensor3('use_char_q') self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var, match_feat_var, use_char_var, use_char_q_var] if rlambda> 0.: W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape) else: W_pert = W_init self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = ( self.build_network(K, vocab_size, W_pert)) self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() + \ rlambda*norm(W_emb-W_init) self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \ rlambda*norm(W_emb-W_init) eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val, target_var).mean() self.params = L.get_all_params(self.network, trainable=True) updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate) self.train_fn = theano.function(self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs], updates=updates, on_unused_input='ignore') self.validate_fn = theano.function(self.inps, [loss_fn_val, eval_fn_val, predicted_probs_val]+attentions, on_unused_input='ignore')
def _compile(self, ddqn): a = self.inputs["A"] r = self.inputs["R"] nonterminal = self.inputs["Nonterminal"] q = ls.get_output(self.network, deterministic=True) if ddqn: q2 = ls.get_output(self.network, deterministic=True, inputs=self.alternate_input_mappings) q2_action_ref = tensor.argmax(q2, axis=1) q2_frozen = ls.get_output(self.frozen_network, deterministic=True) q2_max = q2_frozen[tensor.arange(q2_action_ref.shape[0]), q2_action_ref] else: q2_max = tensor.max(ls.get_output(self.frozen_network, deterministic=True), axis=1) target_q = r + self.gamma * nonterminal * q2_max predicted_q = q[tensor.arange(q.shape[0]), a] loss = self.build_loss_expression(predicted_q, target_q).sum() params = ls.get_all_params(self.network, trainable=True) # updates = lasagne.updates.rmsprop(loss, params, self._learning_rate, rho=0.95) updates = deepmind_rmsprop(loss, params, self.learning_rate) # TODO does FAST_RUN speed anything up? mode = None # "FAST_RUN" s0_img = self.inputs["S0"] s1_img = self.inputs["S1"] if self.misc_state_included: s0_misc = self.inputs["S0_misc"] s1_misc = self.inputs["S1_misc"] print "Compiling the training function..." self._learn = theano.function([s0_img, s0_misc, s1_img, s1_misc, a, r, nonterminal], loss, updates=updates, mode=mode, name="learn_fn") print "Compiling the evaluation function..." self._evaluate = theano.function([s0_img, s0_misc], q, mode=mode, name="eval_fn") else: print "Compiling the training function..." self._learn = theano.function([s0_img, s1_img, a, r, nonterminal], loss, updates=updates, mode=mode, name="learn_fn") print "Compiling the evaluation function..." self._evaluate = theano.function([s0_img], q, mode=mode, name="eval_fn") print "Network compiled."
def _init_model(self, in_size, out_size, n_hid=10, learning_rate_sl=0.005, \ learning_rate_rl=0.005, batch_size=32, ment=0.1): # 2-layer MLP self.in_size = in_size # x and y coordinate self.out_size = out_size # up, down, right, left self.batch_size = batch_size self.learning_rate = learning_rate_rl self.n_hid = n_hid input_var, turn_mask, act_mask, reward_var = T.ftensor3('in'), T.imatrix('tm'), \ T.itensor3('am'), T.fvector('r') in_var = T.reshape(input_var, (input_var.shape[0]*input_var.shape[1],self.in_size)) l_mask_in = L.InputLayer(shape=(None,None), input_var=turn_mask) pol_in = T.fmatrix('pol-h') l_in = L.InputLayer(shape=(None,None,self.in_size), input_var=input_var) l_pol_rnn = L.GRULayer(l_in, n_hid, hid_init=pol_in, mask_input=l_mask_in) # B x H x D pol_out = L.get_output(l_pol_rnn)[:,-1,:] l_den_in = L.ReshapeLayer(l_pol_rnn, (turn_mask.shape[0]*turn_mask.shape[1], n_hid)) # BH x D l_out = L.DenseLayer(l_den_in, self.out_size, nonlinearity=lasagne.nonlinearities.softmax) self.network = l_out self.params = L.get_all_params(self.network) # rl probs = L.get_output(self.network) # BH x A out_probs = T.reshape(probs, (input_var.shape[0],input_var.shape[1],self.out_size)) # B x H x A log_probs = T.log(out_probs) act_probs = (log_probs*act_mask).sum(axis=2) # B x H ep_probs = (act_probs*turn_mask).sum(axis=1) # B H_probs = -T.sum(T.sum(out_probs*log_probs,axis=2),axis=1) # B self.loss = 0.-T.mean(ep_probs*reward_var + ment*H_probs) updates = lasagne.updates.rmsprop(self.loss, self.params, learning_rate=learning_rate_rl, \ epsilon=1e-4) self.inps = [input_var, turn_mask, act_mask, reward_var, pol_in] self.train_fn = theano.function(self.inps, self.loss, updates=updates) self.obj_fn = theano.function(self.inps, self.loss) self.act_fn = theano.function([input_var, turn_mask, pol_in], [out_probs, pol_out]) # sl sl_loss = 0.-T.mean(ep_probs) sl_updates = lasagne.updates.rmsprop(sl_loss, self.params, learning_rate=learning_rate_sl, \ epsilon=1e-4) self.sl_train_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss, \ updates=sl_updates) self.sl_obj_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss)
def build_instrument_model(self, n_vars, **kwargs): targets = TT.vector() instrument_vars = TT.matrix() instruments = layers.InputLayer((None, n_vars), instrument_vars) instruments = layers.DropoutLayer(instruments, p=0.2) dense_layer = layers.DenseLayer(instruments, kwargs['dense_size'], nonlinearity=nonlinearities.tanh) dense_layer = layers.DropoutLayer(dense_layer, p=0.2) for _ in xrange(kwargs['n_dense_layers'] - 1): dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.tanh) dense_layer = layers.DropoutLayer(dense_layer, p=0.5) self.instrument_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear) init_params = layers.get_all_param_values(self.instrument_output) prediction = layers.get_output(self.instrument_output, deterministic=False) test_prediction = layers.get_output(self.instrument_output, deterministic=True) # flexible here, endog variable can be categorical, continuous, etc. l2_cost = regularization.regularize_network_params(self.instrument_output, regularization.l2) loss = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost loss_total = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() params = layers.get_all_params(self.instrument_output, trainable=True) param_updates = updates.adadelta(loss, params) self._instrument_train_fn = theano.function( [ targets, instrument_vars, ], loss, updates=param_updates ) self._instrument_loss_fn = theano.function( [ targets, instrument_vars, ], loss_total ) self._instrument_output_fn = theano.function([instrument_vars], test_prediction) return init_params
def build_treatment_model(self, n_vars, **kwargs): input_vars = TT.matrix() instrument_vars = TT.matrix() targets = TT.vector() inputs = layers.InputLayer((None, n_vars), input_vars) inputs = layers.DropoutLayer(inputs, p=0.2) dense_layer = layers.DenseLayer(inputs, 2 * kwargs['dense_size'], nonlinearity=nonlinearities.rectify) dense_layer = layers.batch_norm(dense_layer) dense_layer= layers.DropoutLayer(dense_layer, p=0.2) for _ in xrange(kwargs['n_dense_layers'] - 1): dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.rectify) dense_layer = layers.batch_norm(dense_layer) self.treatment_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear) init_params = layers.get_all_param_values(self.treatment_output) prediction = layers.get_output(self.treatment_output, deterministic=False) test_prediction = layers.get_output(self.treatment_output, deterministic=True) l2_cost = regularization.regularize_network_params(self.treatment_output, regularization.l2) loss = gmm_loss(prediction, targets, instrument_vars) + 1e-4 * l2_cost params = layers.get_all_params(self.treatment_output, trainable=True) param_updates = updates.adadelta(loss, params) self._train_fn = theano.function( [ input_vars, targets, instrument_vars, ], loss, updates=param_updates ) self._loss_fn = theano.function( [ input_vars, targets, instrument_vars, ], loss, ) self._output_fn = theano.function( [ input_vars, ], test_prediction, ) return init_params
def train_resnet( batch_size=64, # batch size on each GPU validFreq=1, do_valid=False, learning_rate=1e-3, update_rule=updates.sgd, # updates.nesterov_momentum, n_epoch=3, **update_kwargs): # Initialize single GPU. theano.gpuarray.use("cuda") t_0 = time.time() print("Loading data (synthetic)") train, valid, test = load_data() x_train, y_train = train x_valid, y_valid = valid x_test, y_test = test print("Building model") resnet = build_resnet() params = L.get_all_params(resnet.values(), trainable=True) f_train_minibatch, f_predict = build_training(resnet, params, update_rule, learning_rate=learning_rate, **update_kwargs) t_last = t_1 = time.time() print("Total setup time: {:,.1f} s".format(t_1 - t_0)) print("Starting training") for ep in range(n_epoch): train_loss = 0. i = 0 for mb_idxs in iter_mb_idxs(batch_size, len(x_train), shuffle=True): train_loss += f_train_minibatch(x_train[mb_idxs], y_train[mb_idxs]) i += 1 train_loss /= i print("\nEpoch: ", ep) print("Training Loss: {:.3f}".format(train_loss)) if do_valid and ep % validFreq == 0: valid_loss = valid_mc = 0. i = 0 for mb_idxs in iter_mb_idxs(batch_size, len(x_valid), shuffle=False): mb_loss, mb_mc = f_predict(x_valid[mb_idxs], y_valid[mb_idxs]) valid_loss += mb_loss valid_mc += mb_mc i += 1 valid_loss /= i valid_mc /= i print("Validation Loss: {:3f}, Accuracy: {:3f}".format(valid_loss, 1 - valid_mc)) t_2 = time.time() print("(epoch total time: {:,.1f} s)".format(t_2 - t_last)) t_last = t_2 print("\nTotal training time: {:,.1f} s".format(t_last - t_1))
def train_resnet( batch_size=64, # batch size on each GPU validFreq=1, do_valid=False, learning_rate=1e-3, update_rule=updates.sgd, # updates.nesterov_momentum, n_epoch=3, n_gpu=None, # later get this from synk.fork **update_kwargs): n_gpu = synk.fork(n_gpu) # (n_gpu==None will use all) t_0 = time.time() print("Loading data (synthetic)") train, valid, test = load_data() x_train, y_train = [synk.data(d) for d in train] x_valid, y_valid = [synk.data(d) for d in valid] x_test, y_test = [synk.data(d) for d in test] full_mb_size = batch_size * n_gpu learning_rate = learning_rate * n_gpu # (one technique for larger minibatches) num_valid_slices = len(x_valid) // n_gpu // batch_size print("Will compute validation using {} slices".format(num_valid_slices)) print("Building model") resnet = build_resnet() params = L.get_all_params(resnet.values(), trainable=True) f_train_minibatch, f_predict = build_training(resnet, params, update_rule, learning_rate=learning_rate, **update_kwargs) synk.distribute() synk.broadcast(params) # (ensure all GPUs have same values) t_last = t_1 = time.time() print("Total setup time: {:,.1f} s".format(t_1 - t_0)) print("Starting training") for ep in range(n_epoch): train_loss = 0. i = 0 for mb_idxs in iter_mb_idxs(full_mb_size, len(x_train), shuffle=True): train_loss += f_train_minibatch(x_train, y_train, batch=mb_idxs) i += 1 train_loss /= i print("\nEpoch: ", ep) print("Training Loss: {:.3f}".format(train_loss)) if do_valid and ep % validFreq == 0: valid_loss, valid_mc = f_predict(x_valid, y_valid, num_slices=num_valid_slices) print("Validation Loss: {:3f}, Accuracy: {:3f}".format( float(valid_loss), float(1 - valid_mc))) t_2 = time.time() print("(epoch total time: {:,.1f} s)".format(t_2 - t_last)) t_last = t_2 print("\nTotal training time: {:,.1f} s".format(t_last - t_1))