Python lasagne 模块,updates() 实例源码

我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用lasagne.updates()

项目:NeuroNLP    作者:XuezheMax    | 项目源码 | 文件源码
def create_updates(loss, network, opt, learning_rate, momentum, beta1, beta2):
    params = lasagne.layers.get_all_params(network, trainable=True)
    grads = theano.grad(loss, params)
    # if max_norm:
    #     names = ['crf.U', 'crf.W_h', 'crf.W_c', 'crf.b']
    #     constraints = [grad for param, grad in zip(params, grads) if param.name in names]
    #     assert len(constraints) == 4
    #     scaled_grads = total_norm_constraint(constraints, max_norm=max_norm)
    #     counter = 0
    #     for i in xrange(len(params)):
    #         param = params[i]
    #         if param.name in names:
    #             grads[i] = scaled_grads[counter]
    #             counter += 1
    #     assert counter == 4
    if opt == 'adam':
        updates = adam(grads, params=params, learning_rate=learning_rate, beta1=beta1, beta2=beta2)
    elif opt == 'momentum':
        updates = nesterov_momentum(grads, params=params, learning_rate=learning_rate, momentum=momentum)
    else:
        raise ValueError('unkown optimization algorithm: %s' % opt)

    return updates
项目:chordrec    作者:fdlm    | 项目源码 | 文件源码
def create_optimiser(optimiser):
    """
    Creates a function that returns an optimiser and (optional) a learn
    rate schedule
    """

    if optimiser['schedule'] is not None:
        # if we have a learn rate schedule, create a theano shared
        # variable and a corresponding update
        lr = theano.shared(np.float32(optimiser['params']['learning_rate']))

        # create a copy of the optimiser config dict so we do not change
        # it
        from copy import deepcopy
        optimiser = deepcopy(optimiser)
        optimiser['params']['learning_rate'] = lr
        lrs = nn.LearnRateSchedule(learning_rate=lr, **optimiser['schedule'])
    else:
        lrs = None

    return partial(getattr(lnn.updates, optimiser['name']),
                   **optimiser['params']), lrs
项目:dqn_vizdoom_theano    作者:mihahauke    | 项目源码 | 文件源码
def deepmind_rmsprop(loss_or_grads, params, learning_rate=0.00025,
                     rho=0.95, epsilon=0.01):
    grads = get_or_compute_grads(loss_or_grads, params)
    updates = OrderedDict()

    for param, grad in zip(params, grads):
        value = param.get_value(borrow=True)

        acc_grad = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                 broadcastable=param.broadcastable)
        acc_grad_new = rho * acc_grad + (1 - rho) * grad

        acc_rms = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                broadcastable=param.broadcastable)
        acc_rms_new = rho * acc_rms + (1 - rho) * grad ** 2

        updates[acc_grad] = acc_grad_new
        updates[acc_rms] = acc_rms_new

        updates[param] = (param - learning_rate *
                          (grad /
                           T.sqrt(acc_rms_new - acc_grad_new ** 2 + epsilon)))

    return updates
项目:policy_search_bb-alpha    作者:siemens    | 项目源码 | 文件源码
def __init__(self,params,params_task,X,model,policy):

        self.rng = np.random.RandomState()

        self.model = model
        self.policy = policy

        self.params = params
        self.params_task = params_task


        self.x = T.matrix('x')
        cost  =  self.control(self.x)

        self.fwpass  = theano.function(inputs=[self.x], outputs = cost,allow_input_downcast=True)
        self.train_func = theano.function(inputs=[self.x],outputs=[cost], updates=self.adam(cost,lasagne.layers.get_all_params(self.policy,trainable=True),learning_rate=self.params['learning_rate']))

        self.policy_network = theano.function(inputs=[self.x],outputs=self.predict(self.x))
项目:luna16    作者:gzuidhof    | 项目源码 | 文件源码
def define_updates(output_layer, X, Y):
    output_train = lasagne.layers.get_output(output_layer)
    output_test = lasagne.layers.get_output(output_layer, deterministic=True)

    # set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot
    loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y)
    loss = loss.mean()

    acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX)

    # if using ResNet use L2 regularization
    all_layers = lasagne.layers.get_all_layers(output_layer)
    l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA
    loss = loss + l2_penalty

    # set up loss functions for validation dataset
    test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y)
    test_loss = test_loss.mean()
    test_loss = test_loss + l2_penalty

    test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX)

    # get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed
    l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX))
    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM)
    #updates = adam(loss, params, learning_rate=l_r)

    prediction_binary = T.argmax(output_train, axis=1)
    test_prediction_binary = T.argmax(output_test, axis=1)

    # set up training and prediction functions
    train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates)
    valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]])

    return train_fn, valid_fn, l_r
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain, isNN):
        super(RegressionNN, self).__init__(isTrain, isNN)
        # data preprocessing
        #self.dataPreprocessing()

        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            #('hidden3', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 13),  # input dimension is 13
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=8,  # number of units in hidden layer
                        #hidden3_num_units=4,  # number of units in hidden layer
                        output_nonlinearity=None,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # obejctive function
                        objective_loss_function = lasagne.objectives.squared_error,

                        # optimization method:
                        update=lasagne.updates.nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.4,

                        # use 25% as validation
                        train_split=TrainSplit(eval_size=0.2),

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=100,  # we want to train this many epochs
                        verbose=0,
                        )
项目:opt-mmd    作者:dougalsutherland    | 项目源码 | 文件源码
def setup(dim, criterion='mmd', biased=True, streaming_est=False, opt_log=True,
          linear_kernel=False, opt_sigma=False, init_log_sigma=0,
          net_version='basic', hotelling_reg=0,
          strat='nesterov_momentum', learning_rate=0.01, **opt_args):
    input_p = T.matrix('input_p')
    input_q = T.matrix('input_q')

    mmd2_pq, obj, rep_p, net_p, net_q, log_sigma = make_network(
        input_p, input_q, dim,
        criterion=criterion, biased=biased, streaming_est=streaming_est,
        opt_log=opt_log, linear_kernel=linear_kernel, log_sigma=init_log_sigma,
        hotelling_reg=hotelling_reg, net_version=net_version)

    params = lasagne.layers.get_all_params([net_p, net_q], trainable=True)
    if opt_sigma:
        params.append(log_sigma)
    fn = getattr(lasagne.updates, strat)
    updates = fn(obj, params, learning_rate=learning_rate, **opt_args)

    print("Compiling...", file=sys.stderr, end='')
    train_fn = theano.function(
        [input_p, input_q], [mmd2_pq, obj], updates=updates)
    val_fn = theano.function([input_p, input_q], [mmd2_pq, obj])
    get_rep = theano.function([input_p], rep_p)
    print("done", file=sys.stderr)

    return params, train_fn, val_fn, get_rep, log_sigma
项目:kaggle_dsb    作者:syagev    | 项目源码 | 文件源码
def define_updates(output_layer, X, Y):
    output_train = lasagne.layers.get_output(output_layer)
    output_test = lasagne.layers.get_output(output_layer, deterministic=True)

    # set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot
    loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y)
    loss = loss.mean()

    acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX)

    # if using ResNet use L2 regularization
    all_layers = lasagne.layers.get_all_layers(output_layer)
    l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA
    loss = loss + l2_penalty

    # set up loss functions for validation dataset
    test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y)
    test_loss = test_loss.mean()
    test_loss = test_loss + l2_penalty

    test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX)

    # get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed
    l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX))
    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM)
    #updates = adam(loss, params, learning_rate=l_r)

    prediction_binary = T.argmax(output_train, axis=1)
    test_prediction_binary = T.argmax(output_test, axis=1)

    # set up training and prediction functions
    train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates)
    valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]])

    return train_fn, valid_fn, l_r
项目:baal    作者:braingineer    | 项目源码 | 文件源码
def updater(self):
        return getattr(lasagne.updates, self.learning_algorithm)
项目:policy_search_bb-alpha    作者:siemens    | 项目源码 | 文件源码
def control(self,st):
        srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
        # do n roll-outs for each starting state
        n = self.params['samples']
        st_s = T.tile(st,[n,1])

        onoise =  srng.normal(size=(st_s.shape[0],1,self.params['T']))
        inoise = T.sqrt(st.shape[1]) * srng.normal(size=(n,st.shape[0],self.params['T']))

        ([_,_,R], updates) = theano.scan(fn=self._step,outputs_info=[st_s,T.as_tensor_variable(0),None],n_steps=self.params['T'],non_sequences=[onoise,inoise])
        return R.mean()
项目:policy_search_bb-alpha    作者:siemens    | 项目源码 | 文件源码
def adam(self,cost, params, learning_rate=0.001, beta1=0.9,
             beta2=0.999, epsilon=1e-8):

        all_grads = T.grad(cost=cost, wrt=params)
        all_grads = total_norm_constraint(all_grads,10)

        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), all_grads)))
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))

        t_prev = theano.shared(utils.floatX(0.))
        updates = OrderedDict()

        t = t_prev + 1
        a_t = learning_rate*T.sqrt(1-beta2**t)/(1-beta1**t)

        for param, g_t in zip(params, all_grads):
            g_t = T.switch(not_finite, 0.1 * param,g_t)
            value = param.get_value(borrow=True)
            m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)
            v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                                   broadcastable=param.broadcastable)

            m_t = beta1*m_prev + (1-beta1)*g_t
            v_t = beta2*v_prev + (1-beta2)*g_t**2
            step = a_t*m_t/(T.sqrt(v_t) + epsilon)

            updates[m_prev] = m_t
            updates[v_prev] = v_t
            updates[param] = param - step

        updates[t_prev] = t
        return updates
项目:ntm-one-shot    作者:tristandeleu    | 项目源码 | 文件源码
def omniglot():
    input_var = T.tensor3('input') # input_var has dimensions (batch_size, time, input_dim)
    target_var = T.imatrix('target') # target_var has dimensions (batch_size, time) (label indices)

    # Load data
    generator = OmniglotGenerator(data_folder='./data/omniglot', batch_size=16, \
        nb_samples=5, nb_samples_per_class=10, max_rotation=0., max_shift=0, max_iter=None)

    output_var, output_var_flatten, params = memory_augmented_neural_network(input_var, \
        target_var, batch_size=generator.batch_size, nb_class=generator.nb_samples, \
        memory_shape=(128, 40), controller_size=200, input_size=20 * 20, nb_reads=4)

    cost = T.mean(T.nnet.categorical_crossentropy(output_var_flatten, target_var.flatten()))
    updates = lasagne.updates.adam(cost, params, learning_rate=1e-3)

    accuracies = accuracy_instance(T.argmax(output_var, axis=2), target_var, batch_size=generator.batch_size)

    print('Compiling the model...')
    train_fn = theano.function([input_var, target_var], cost, updates=updates)
    accuracy_fn = theano.function([input_var, target_var], accuracies)
    print('Done')

    print('Training...')
    t0 = time.time()
    all_scores, scores, accs = [], [], np.zeros(generator.nb_samples_per_class)
    try:
        for i, (example_input, example_output) in generator:
            score = train_fn(example_input, example_output)
            acc = accuracy_fn(example_input, example_output)
            all_scores.append(score)
            scores.append(score)
            accs += acc
            if i > 0 and not (i % 100):
                print('Episode %05d: %.6f' % (i, np.mean(score)))
                print(accs / 100.)
                scores, accs = [], np.zeros(generator.nb_samples_per_class)
    except KeyboardInterrupt:
        print(time.time() - t0)
        pass
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def test_sa(self):
    self.precheck()

    train = nn.updates.sa(
      self.inputs, self.loss, self.params, outputs=[self.loss / 2],
      iters=2014, initial_temperature=2.0e-1, learning_rate=5.0e-1
    )

    ret = train(*self.get_inputs())

    assert len(ret) == 1, 'Optimization function should return output!'
    self.check('Simulated Annealing')
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def test_adastep(self):
    self.precheck()

    train = nn.updates.adastep(
      self.inputs, self.loss, self.params, outputs=[self.loss / 2],
      max_iter=8, rho=0.9, initial_learning_rate=1.0e-1, momentum=0.9,
      max_learning_rate=1.0e-1, max_delta=0.1
    )

    for i in range(128):
      ret = train(*self.get_inputs())

    assert len(ret) == 1
    self.check('AdaStep')
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def std_opt(self, method, learning_rate=1.0e-3, *args, **kwargs):
    if not callable(method):
      import lasagne.updates as updates
      method = getattr(updates, method)

    self.precheck()

    upd = method(self.loss, self.params, learning_rate=learning_rate, *args, **kwargs)
    train = theano.function(self.inputs, outputs=self.loss, updates=upd)

    #path = []

    for i in range(2048):
      train(*self.get_inputs())
      #path.append(self.params[0].get_value())

    # path = np.array(path)
    #
    # Xs, Ys = np.meshgrid(np.linspace(-1, 2, num=50), np.linspace(-1, 2, num=50))
    # Zs = np.zeros(shape=(50, 50))
    #
    # for i in range(50):
    #   for j in range(50):
    #     Zs[i, j] = self.get_loss(np.array([Xs[i, j], Ys[i, j]]).astype('float32'), *self.get_inputs())
    #
    # import matplotlib.pyplot as plt
    #
    # plt.figure()
    # plt.contourf(Xs, Ys, Zs)
    # plt.colorbar()
    # plt.scatter(path[:, 0], path[:, 1], color=[ plt.cm.Greys(x) for x in np.linspace(0, 1, num=2048) ], s = 5)
    # plt.show()

    self.check(method)
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def test_pseudograd(self):
    self.std_opt(nn.updates.pseudograd, temperature=1.0e-3, learning_rate=1.0e-2)
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def test_adastep(self):
    self.precheck()

    train = nn.updates.adastep(
      self.inputs, self.loss, self.params, outputs=[self.loss / 2],
      max_iter=8, rho=0.9, initial_learning_rate=1.0e-3, momentum=0.9,
      max_learning_rate=1.0e+6, max_delta=1.0e-1, eps=1.0e-6
    )

    for i in range(512):
      ret = train()

    assert len(ret) == 1
    self.check('AdaStep')
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionUniformBlending, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            #('hidden3', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 13),  # input dimension is 13
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=8,  # number of units in hidden layer
                        #hidden3_num_units=4,  # number of units in hidden layer
                        output_nonlinearity=None,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # obejctive function
                        objective_loss_function = lasagne.objectives.squared_error,

                        # optimization method:
                        update=lasagne.updates.nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.4,

                        # use 25% as validation
                        train_split=TrainSplit(eval_size=0.2),

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=100,  # we want to train this many epochs
                        verbose=0,
                        )

        # Create linear regression object
        self.linRegr = linear_model.LinearRegression()

        # Create KNN regression object
        self.knn = neighbors.KNeighborsRegressor(86, weights='distance')

        # Create Decision Tree regression object
        self.decisionTree = DecisionTreeRegressor(max_depth=7, max_features=None)

        # Create AdaBoost regression object
        decisionReg = DecisionTreeRegressor(max_depth=10)
        rng = np.random.RandomState(1)
        self.adaReg = AdaBoostRegressor(decisionReg,
                          n_estimators=400,
                          random_state=rng)

        # Create linear regression object
        self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
项目:dqn_vizdoom_theano    作者:mihahauke    | 项目源码 | 文件源码
def _compile(self, ddqn):

        a = self.inputs["A"]
        r = self.inputs["R"]
        nonterminal = self.inputs["Nonterminal"]

        q = ls.get_output(self.network, deterministic=True)

        if ddqn:
            q2 = ls.get_output(self.network, deterministic=True, inputs=self.alternate_input_mappings)
            q2_action_ref = tensor.argmax(q2, axis=1)

            q2_frozen = ls.get_output(self.frozen_network, deterministic=True)
            q2_max = q2_frozen[tensor.arange(q2_action_ref.shape[0]), q2_action_ref]
        else:
            q2_max = tensor.max(ls.get_output(self.frozen_network, deterministic=True), axis=1)

        target_q = r + self.gamma * nonterminal * q2_max
        predicted_q = q[tensor.arange(q.shape[0]), a]

        loss = self.build_loss_expression(predicted_q, target_q).sum()
        params = ls.get_all_params(self.network, trainable=True)

        # updates = lasagne.updates.rmsprop(loss, params, self._learning_rate, rho=0.95)
        updates = deepmind_rmsprop(loss, params, self.learning_rate)

        # TODO does FAST_RUN speed anything up?
        mode = None  # "FAST_RUN"

        s0_img = self.inputs["S0"]
        s1_img = self.inputs["S1"]

        if self.misc_state_included:
            s0_misc = self.inputs["S0_misc"]
            s1_misc = self.inputs["S1_misc"]
            print "Compiling the training function..."
            self._learn = theano.function([s0_img, s0_misc, s1_img, s1_misc, a, r, nonterminal], loss,
                                          updates=updates, mode=mode, name="learn_fn")
            print "Compiling the evaluation function..."
            self._evaluate = theano.function([s0_img, s0_misc], q, mode=mode,
                                             name="eval_fn")
        else:
            print "Compiling the training function..."
            self._learn = theano.function([s0_img, s1_img, a, r, nonterminal], loss, updates=updates, mode=mode,
                                          name="learn_fn")
            print "Compiling the evaluation function..."
            self._evaluate = theano.function([s0_img], q, mode=mode, name="eval_fn")
        print "Network compiled."