我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用lasagne.updates()。
def create_updates(loss, network, opt, learning_rate, momentum, beta1, beta2): params = lasagne.layers.get_all_params(network, trainable=True) grads = theano.grad(loss, params) # if max_norm: # names = ['crf.U', 'crf.W_h', 'crf.W_c', 'crf.b'] # constraints = [grad for param, grad in zip(params, grads) if param.name in names] # assert len(constraints) == 4 # scaled_grads = total_norm_constraint(constraints, max_norm=max_norm) # counter = 0 # for i in xrange(len(params)): # param = params[i] # if param.name in names: # grads[i] = scaled_grads[counter] # counter += 1 # assert counter == 4 if opt == 'adam': updates = adam(grads, params=params, learning_rate=learning_rate, beta1=beta1, beta2=beta2) elif opt == 'momentum': updates = nesterov_momentum(grads, params=params, learning_rate=learning_rate, momentum=momentum) else: raise ValueError('unkown optimization algorithm: %s' % opt) return updates
def create_optimiser(optimiser): """ Creates a function that returns an optimiser and (optional) a learn rate schedule """ if optimiser['schedule'] is not None: # if we have a learn rate schedule, create a theano shared # variable and a corresponding update lr = theano.shared(np.float32(optimiser['params']['learning_rate'])) # create a copy of the optimiser config dict so we do not change # it from copy import deepcopy optimiser = deepcopy(optimiser) optimiser['params']['learning_rate'] = lr lrs = nn.LearnRateSchedule(learning_rate=lr, **optimiser['schedule']) else: lrs = None return partial(getattr(lnn.updates, optimiser['name']), **optimiser['params']), lrs
def deepmind_rmsprop(loss_or_grads, params, learning_rate=0.00025, rho=0.95, epsilon=0.01): grads = get_or_compute_grads(loss_or_grads, params) updates = OrderedDict() for param, grad in zip(params, grads): value = param.get_value(borrow=True) acc_grad = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) acc_grad_new = rho * acc_grad + (1 - rho) * grad acc_rms = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) acc_rms_new = rho * acc_rms + (1 - rho) * grad ** 2 updates[acc_grad] = acc_grad_new updates[acc_rms] = acc_rms_new updates[param] = (param - learning_rate * (grad / T.sqrt(acc_rms_new - acc_grad_new ** 2 + epsilon))) return updates
def __init__(self,params,params_task,X,model,policy): self.rng = np.random.RandomState() self.model = model self.policy = policy self.params = params self.params_task = params_task self.x = T.matrix('x') cost = self.control(self.x) self.fwpass = theano.function(inputs=[self.x], outputs = cost,allow_input_downcast=True) self.train_func = theano.function(inputs=[self.x],outputs=[cost], updates=self.adam(cost,lasagne.layers.get_all_params(self.policy,trainable=True),learning_rate=self.params['learning_rate'])) self.policy_network = theano.function(inputs=[self.x],outputs=self.predict(self.x))
def define_updates(output_layer, X, Y): output_train = lasagne.layers.get_output(output_layer) output_test = lasagne.layers.get_output(output_layer, deterministic=True) # set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y) loss = loss.mean() acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX) # if using ResNet use L2 regularization all_layers = lasagne.layers.get_all_layers(output_layer) l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA loss = loss + l2_penalty # set up loss functions for validation dataset test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y) test_loss = test_loss.mean() test_loss = test_loss + l2_penalty test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX) # get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX)) params = lasagne.layers.get_all_params(output_layer, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM) #updates = adam(loss, params, learning_rate=l_r) prediction_binary = T.argmax(output_train, axis=1) test_prediction_binary = T.argmax(output_test, axis=1) # set up training and prediction functions train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates) valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]]) return train_fn, valid_fn, l_r
def __init__(self, isTrain, isNN): super(RegressionNN, self).__init__(isTrain, isNN) # data preprocessing #self.dataPreprocessing() self.net1 = NeuralNet( layers=[ # three layers: one hidden layer ('input', layers.InputLayer), ('hidden', layers.DenseLayer), #('hidden2', layers.DenseLayer), #('hidden3', layers.DenseLayer), ('output', layers.DenseLayer), ], # layer parameters: input_shape=(None, 13), # input dimension is 13 hidden_num_units=6, # number of units in hidden layer #hidden2_num_units=8, # number of units in hidden layer #hidden3_num_units=4, # number of units in hidden layer output_nonlinearity=None, # output layer uses sigmoid function output_num_units=1, # output dimension is 1 # obejctive function objective_loss_function = lasagne.objectives.squared_error, # optimization method: update=lasagne.updates.nesterov_momentum, update_learning_rate=0.002, update_momentum=0.4, # use 25% as validation train_split=TrainSplit(eval_size=0.2), regression=True, # flag to indicate we're dealing with regression problem max_epochs=100, # we want to train this many epochs verbose=0, )
def setup(dim, criterion='mmd', biased=True, streaming_est=False, opt_log=True, linear_kernel=False, opt_sigma=False, init_log_sigma=0, net_version='basic', hotelling_reg=0, strat='nesterov_momentum', learning_rate=0.01, **opt_args): input_p = T.matrix('input_p') input_q = T.matrix('input_q') mmd2_pq, obj, rep_p, net_p, net_q, log_sigma = make_network( input_p, input_q, dim, criterion=criterion, biased=biased, streaming_est=streaming_est, opt_log=opt_log, linear_kernel=linear_kernel, log_sigma=init_log_sigma, hotelling_reg=hotelling_reg, net_version=net_version) params = lasagne.layers.get_all_params([net_p, net_q], trainable=True) if opt_sigma: params.append(log_sigma) fn = getattr(lasagne.updates, strat) updates = fn(obj, params, learning_rate=learning_rate, **opt_args) print("Compiling...", file=sys.stderr, end='') train_fn = theano.function( [input_p, input_q], [mmd2_pq, obj], updates=updates) val_fn = theano.function([input_p, input_q], [mmd2_pq, obj]) get_rep = theano.function([input_p], rep_p) print("done", file=sys.stderr) return params, train_fn, val_fn, get_rep, log_sigma
def updater(self): return getattr(lasagne.updates, self.learning_algorithm)
def control(self,st): srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999)) # do n roll-outs for each starting state n = self.params['samples'] st_s = T.tile(st,[n,1]) onoise = srng.normal(size=(st_s.shape[0],1,self.params['T'])) inoise = T.sqrt(st.shape[1]) * srng.normal(size=(n,st.shape[0],self.params['T'])) ([_,_,R], updates) = theano.scan(fn=self._step,outputs_info=[st_s,T.as_tensor_variable(0),None],n_steps=self.params['T'],non_sequences=[onoise,inoise]) return R.mean()
def adam(self,cost, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): all_grads = T.grad(cost=cost, wrt=params) all_grads = total_norm_constraint(all_grads,10) grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), all_grads))) not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm)) t_prev = theano.shared(utils.floatX(0.)) updates = OrderedDict() t = t_prev + 1 a_t = learning_rate*T.sqrt(1-beta2**t)/(1-beta1**t) for param, g_t in zip(params, all_grads): g_t = T.switch(not_finite, 0.1 * param,g_t) value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = beta1*m_prev + (1-beta1)*g_t v_t = beta2*v_prev + (1-beta2)*g_t**2 step = a_t*m_t/(T.sqrt(v_t) + epsilon) updates[m_prev] = m_t updates[v_prev] = v_t updates[param] = param - step updates[t_prev] = t return updates
def omniglot(): input_var = T.tensor3('input') # input_var has dimensions (batch_size, time, input_dim) target_var = T.imatrix('target') # target_var has dimensions (batch_size, time) (label indices) # Load data generator = OmniglotGenerator(data_folder='./data/omniglot', batch_size=16, \ nb_samples=5, nb_samples_per_class=10, max_rotation=0., max_shift=0, max_iter=None) output_var, output_var_flatten, params = memory_augmented_neural_network(input_var, \ target_var, batch_size=generator.batch_size, nb_class=generator.nb_samples, \ memory_shape=(128, 40), controller_size=200, input_size=20 * 20, nb_reads=4) cost = T.mean(T.nnet.categorical_crossentropy(output_var_flatten, target_var.flatten())) updates = lasagne.updates.adam(cost, params, learning_rate=1e-3) accuracies = accuracy_instance(T.argmax(output_var, axis=2), target_var, batch_size=generator.batch_size) print('Compiling the model...') train_fn = theano.function([input_var, target_var], cost, updates=updates) accuracy_fn = theano.function([input_var, target_var], accuracies) print('Done') print('Training...') t0 = time.time() all_scores, scores, accs = [], [], np.zeros(generator.nb_samples_per_class) try: for i, (example_input, example_output) in generator: score = train_fn(example_input, example_output) acc = accuracy_fn(example_input, example_output) all_scores.append(score) scores.append(score) accs += acc if i > 0 and not (i % 100): print('Episode %05d: %.6f' % (i, np.mean(score))) print(accs / 100.) scores, accs = [], np.zeros(generator.nb_samples_per_class) except KeyboardInterrupt: print(time.time() - t0) pass
def test_sa(self): self.precheck() train = nn.updates.sa( self.inputs, self.loss, self.params, outputs=[self.loss / 2], iters=2014, initial_temperature=2.0e-1, learning_rate=5.0e-1 ) ret = train(*self.get_inputs()) assert len(ret) == 1, 'Optimization function should return output!' self.check('Simulated Annealing')
def test_adastep(self): self.precheck() train = nn.updates.adastep( self.inputs, self.loss, self.params, outputs=[self.loss / 2], max_iter=8, rho=0.9, initial_learning_rate=1.0e-1, momentum=0.9, max_learning_rate=1.0e-1, max_delta=0.1 ) for i in range(128): ret = train(*self.get_inputs()) assert len(ret) == 1 self.check('AdaStep')
def std_opt(self, method, learning_rate=1.0e-3, *args, **kwargs): if not callable(method): import lasagne.updates as updates method = getattr(updates, method) self.precheck() upd = method(self.loss, self.params, learning_rate=learning_rate, *args, **kwargs) train = theano.function(self.inputs, outputs=self.loss, updates=upd) #path = [] for i in range(2048): train(*self.get_inputs()) #path.append(self.params[0].get_value()) # path = np.array(path) # # Xs, Ys = np.meshgrid(np.linspace(-1, 2, num=50), np.linspace(-1, 2, num=50)) # Zs = np.zeros(shape=(50, 50)) # # for i in range(50): # for j in range(50): # Zs[i, j] = self.get_loss(np.array([Xs[i, j], Ys[i, j]]).astype('float32'), *self.get_inputs()) # # import matplotlib.pyplot as plt # # plt.figure() # plt.contourf(Xs, Ys, Zs) # plt.colorbar() # plt.scatter(path[:, 0], path[:, 1], color=[ plt.cm.Greys(x) for x in np.linspace(0, 1, num=2048) ], s = 5) # plt.show() self.check(method)
def test_pseudograd(self): self.std_opt(nn.updates.pseudograd, temperature=1.0e-3, learning_rate=1.0e-2)
def test_adastep(self): self.precheck() train = nn.updates.adastep( self.inputs, self.loss, self.params, outputs=[self.loss / 2], max_iter=8, rho=0.9, initial_learning_rate=1.0e-3, momentum=0.9, max_learning_rate=1.0e+6, max_delta=1.0e-1, eps=1.0e-6 ) for i in range(512): ret = train() assert len(ret) == 1 self.check('AdaStep')
def __init__(self, isTrain): super(RegressionUniformBlending, self).__init__(isTrain) # data preprocessing #self.dataPreprocessing() self.net1 = NeuralNet( layers=[ # three layers: one hidden layer ('input', layers.InputLayer), ('hidden', layers.DenseLayer), #('hidden2', layers.DenseLayer), #('hidden3', layers.DenseLayer), ('output', layers.DenseLayer), ], # layer parameters: input_shape=(None, 13), # input dimension is 13 hidden_num_units=6, # number of units in hidden layer #hidden2_num_units=8, # number of units in hidden layer #hidden3_num_units=4, # number of units in hidden layer output_nonlinearity=None, # output layer uses sigmoid function output_num_units=1, # output dimension is 1 # obejctive function objective_loss_function = lasagne.objectives.squared_error, # optimization method: update=lasagne.updates.nesterov_momentum, update_learning_rate=0.002, update_momentum=0.4, # use 25% as validation train_split=TrainSplit(eval_size=0.2), regression=True, # flag to indicate we're dealing with regression problem max_epochs=100, # we want to train this many epochs verbose=0, ) # Create linear regression object self.linRegr = linear_model.LinearRegression() # Create KNN regression object self.knn = neighbors.KNeighborsRegressor(86, weights='distance') # Create Decision Tree regression object self.decisionTree = DecisionTreeRegressor(max_depth=7, max_features=None) # Create AdaBoost regression object decisionReg = DecisionTreeRegressor(max_depth=10) rng = np.random.RandomState(1) self.adaReg = AdaBoostRegressor(decisionReg, n_estimators=400, random_state=rng) # Create linear regression object self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
def _compile(self, ddqn): a = self.inputs["A"] r = self.inputs["R"] nonterminal = self.inputs["Nonterminal"] q = ls.get_output(self.network, deterministic=True) if ddqn: q2 = ls.get_output(self.network, deterministic=True, inputs=self.alternate_input_mappings) q2_action_ref = tensor.argmax(q2, axis=1) q2_frozen = ls.get_output(self.frozen_network, deterministic=True) q2_max = q2_frozen[tensor.arange(q2_action_ref.shape[0]), q2_action_ref] else: q2_max = tensor.max(ls.get_output(self.frozen_network, deterministic=True), axis=1) target_q = r + self.gamma * nonterminal * q2_max predicted_q = q[tensor.arange(q.shape[0]), a] loss = self.build_loss_expression(predicted_q, target_q).sum() params = ls.get_all_params(self.network, trainable=True) # updates = lasagne.updates.rmsprop(loss, params, self._learning_rate, rho=0.95) updates = deepmind_rmsprop(loss, params, self.learning_rate) # TODO does FAST_RUN speed anything up? mode = None # "FAST_RUN" s0_img = self.inputs["S0"] s1_img = self.inputs["S1"] if self.misc_state_included: s0_misc = self.inputs["S0_misc"] s1_misc = self.inputs["S1_misc"] print "Compiling the training function..." self._learn = theano.function([s0_img, s0_misc, s1_img, s1_misc, a, r, nonterminal], loss, updates=updates, mode=mode, name="learn_fn") print "Compiling the evaluation function..." self._evaluate = theano.function([s0_img, s0_misc], q, mode=mode, name="eval_fn") else: print "Compiling the training function..." self._learn = theano.function([s0_img, s1_img, a, r, nonterminal], loss, updates=updates, mode=mode, name="learn_fn") print "Compiling the evaluation function..." self._evaluate = theano.function([s0_img], q, mode=mode, name="eval_fn") print "Network compiled."