我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.shared()。
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) for p, g in zip(params, grads): mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) if mom1>0: v_t = mom1*v + (1. - mom1)*g updates.append((v,v_t)) else: v_t = g mg_t = T.maximum(mom2*mg, abs(g)) g_t = v_t / (mg_t + 1e-6) p_t = p - lr * g_t updates.append((mg, mg_t)) updates.append((p, p_t)) return updates
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
def compile(self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False): def get_shared_shape(v): return v.get_value(borrow=True, return_internal_type=True).shape if type(s_inputs_) not in (list, tuple): s_inputs_ = [s_inputs_] if isinstance(updates_, dict): updates_= list(updates_.items()) super(AdamSGD,self).compile( s_inputs_, s_loss_, v_params_, s_reg_=s_reg_, s_grads_=s_grads_, trunc_grad_=trunc_grad_) self.v_m = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_m_'+p.name if p.name is not None else None) for p in v_params_] self.v_v = [th.shared(value=np.zeros(get_shared_shape(p), th.config.floatX), name='adam_v_'+p.name if p.name is not None else None) for p in v_params_] s_b1 = T.scalar('adam_b1'); s_b2 = T.scalar('adam_b2') s_b1s = T.scalar('adam_b1s'); s_b2s = T.scalar('adam_b2s') update_m = [(m, (m*s_b1 + (1.-s_b1)*g)) for m,g in zip(self.v_m,self.s_grads)] update_v = [(v, (v*s_b2 + (1.-s_b2)*g*g)) for v,g in zip(self.v_v,self.s_grads)] apply_grad = [(p, p-(s_b1s*m*self.s_lr)/(T.sqrt(s_b2s*v)+self.eps)) for p,m,v in zip(v_params_,self.v_m,self.v_v)] self.fn_train = th.function( inputs=[self.s_lr]+s_inputs_+[s_b1,s_b2,s_b1s,s_b2s], outputs=fetches_, updates=update_m+update_v+apply_grad+(updates_ if updates_ else []), on_unused_input='warn', givens=givens_, profile=profile_) self.fn_rst = th.function(inputs=[], updates=[(v, T.zeros_like(v)) for v in self.v_m+self.v_v], profile=profile_) return self.fn_train
def get_costs(self, probs, y, y_mask, decay_cs=None, opt_rets=None): """ probs : dict, mapping cg_name to probabilities y : theano tensor variable y_mask : theano tensor variable decay_cs : list of l2 regularization weights opt_rets : dict, mapping cg_name to optional returned variables """ costs = self.decoder.costs(probs, y, y_mask) if decay_cs is not None: for name, cost in costs.iteritems(): if decay_cs[name] > 0.: decay_c = theano.shared(numpy.float32(decay_cs[name]), name='decay_c') weight_decay = 0. for pp in ComputationGraph(cost).parameters: weight_decay += (pp ** 2).sum() weight_decay *= decay_c costs[name] += weight_decay costs[name].name = name return costs
def param(name, *args, **kwargs): """ A wrapper for `theano.shared` which enables parameter sharing in models. Creates and returns theano shared variables similarly to `theano.shared`, except if you try to create a param with the same name as a previously-created one, `param(...)` will just return the old one instead of making a new one. This constructor also adds a `param` attribute to the shared variables it creates, so that you can easily search a graph for all params. """ if name not in _params: kwargs['name'] = name param = theano.shared(*args, **kwargs) param.param = True _params[name] = param return _params[name]
def adadelta(parameters, gradients, rho=0.95, eps=1e-6): """ Reference: ADADELTA: An Adaptive Learning Rate Method, Zeiler 2012. https://arxiv.org/abs/1212.5701 Adapted from the Adadelta implementation from Tensorflow. """ accum = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters] accum_updates = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters] new_accum = [rho * g0 + (1.0 - rho) * (g**2) for g0, g in izip(accum, gradients)] updates = [tensor.sqrt(d0 + eps) / tensor.sqrt(g0 + eps) * g for d0, g0, g in izip(accum_updates, new_accum, gradients)] new_accum_updates = [rho * d0 + (1.0 - rho) * (d**2) for d0, d in izip(accum_updates, updates)] accum_ = zip(accum, new_accum) accum_updates_ = zip(accum_updates, new_accum_updates) parameters_ = [ (p, (p - d)) for p,d in izip(parameters, updates)] return accum_ + accum_updates_ + parameters_
def addData(self, data): """ Set the data of the network, not managed within training iterations, e.g. used for validation or other small data :param data: training data and labels specified as dictionary :return: None """ if not isinstance(data, dict): raise ValueError("Error: expected dictionary for data!") for key in data: # no need to cache validation data setattr(self, key+'DB', self.alignData(data[key])) # shared variable already exists? if hasattr(self, key): print("Reusing shared variables!") getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True) else: # create shared data setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
def addStaticData(self, data): """ Set the data of the network, not managed within training iterations, e.g. used for validation or other small data :param data: training data and labels specified as dictionary :return: None """ if not isinstance(data, dict): raise ValueError("Error: expected dictionary for data!") for key in data: # no need to cache validation data setattr(self, key+'DB', data[key]) # shared variable already exists? if hasattr(self, key): print("Reusing shared variables!") getattr(self, key).set_value(getattr(self, key+'DB'), borrow=True) else: # create shared data setattr(self, key, theano.shared(getattr(self, key+'DB'), name=key, borrow=True))
def replaceTrainingData(self, start_idx, end_idx, last=False): """ Replace the shared data of the training data :param start_idx: start index of data :param end_idx: end index of data :param last: specify if it is last macro-batch :return: None """ for var in self.managedVar: if not hasattr(self, var): raise ValueError("Variable " + var + " not defined!") if last is True: getattr(self, var).set_value(getattr(self, var+'DBlast')[start_idx:end_idx], borrow=True) else: getattr(self, var).set_value(getattr(self, var+'DB')[start_idx:end_idx], borrow=True)
def loadMacroBatch(self, macro_idx): """ Make sure that macro batch is loaded in the shared variable :param macro_idx: macro batch index :return: None """ if macro_idx != self.currentMacroBatch: # last macro batch is handled separately, as it is padded if self.isLastMacroBatch(macro_idx): start_idx = 0 end_idx = self.getNumSamplesPerMacroBatch() print("Loading last macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx)) self.replaceTrainingData(start_idx, end_idx, last=True) # remember current macro batch index self.currentMacroBatch = macro_idx else: start_idx = macro_idx * self.getNumSamplesPerMacroBatch() end_idx = min((macro_idx + 1) * self.getNumSamplesPerMacroBatch(), self.train_data_xDB.shape[0]) print("Loading macro batch {}, start idx {}, end idx {}".format(macro_idx, start_idx, end_idx)) self.replaceTrainingData(start_idx, end_idx) # remember current macro batch index self.currentMacroBatch = macro_idx
def query_variable(self, query_): ''' Return an iterable which yields shared variables found by query_, from current group. query_: Can take several forms, as shown below. All: return all variables under current group. string: treat as regex, return variables whose name fully match the regex. ''' if query_ is All: return self._current_group_di.values() elif isinstance(query_, str): regex = re.compile(query_) return {k:v for k,v in self._current_group_di.items() if regex.fullmatch(k)} else: raise TypeError('Unknown query type "%s"' % type(query_)) # TODO add / delete group does not consider non-group object by now
def sgd_optimizer(model, lr=0.001, momentum=0.9): lr = theano.shared(np.array(lr).astype(theano.config.floatX)) # Make sure momentum is a sane value assert momentum < 1 and momentum >= 0 # the updates of SGD with momentum updates = [] grads = T.grad(model.costs[0], model.params) for param, grad in zip(model.params, grads): param_update = theano.shared(param.get_value()*0.) updates.append((param, param - lr * param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*grad)) train_func = theano.function(model.inputs, model.costs, updates=updates) valid_func = theano.function(model.inputs, model.costs) return train_func, valid_func
def get_cost(aes, l, eye=True): """Get the sum of all the reconstruction costs of the AEs. Input: aes_in: list. List of all the aes. l: shared variable or a list of shared variables for the importance weights. """ costs = [] for ae, i in zip(aes, range(len(aes))): if isinstance(ae, ConvolutionalAutoencoder): costs.append(l[i] * ae.get_train_cost()[0]) else: costs.append(l[i] * ae.get_train_cost(face=eye)[0]) cost = None if costs not in [[], None]: cost = reduce(lambda x, y: x + y, costs) return cost
def evaluate_model(list_minibatchs_vl, eval_fn): """Evalute the model over a set.""" error, output = None, None for mn_vl in list_minibatchs_vl: x = theano.shared( mn_vl['x'], borrow=True).get_value(borrow=True) y = theano.shared( mn_vl['y'], borrow=True).get_value(borrow=True) [error_mn, output_mn] = eval_fn(x, y) if error is None: error = error_mn output = output_mn else: error = np.vstack((error, error_mn)) output = np.vstack((output, output_mn)) return error, output
def evaluate_model_3D_unsup(list_minibatchs_vl, eval_fn): """Evalute the model over a set.""" error, output, code = None, None, None for mn_vl in list_minibatchs_vl: x = theano.shared( mn_vl['x'], borrow=True).get_value(borrow=True) [error_mn, output_mn, code_mn] = eval_fn(x) if error is None: error = error_mn output = output_mn code = code_mn else: error = np.vstack((error, error_mn)) output = np.vstack((output, output_mn)) code = np.vstack((code, code_mn)) return error, output, code
def shared_dataset(self, data_xy, train=False, borrow=True): """Load the data to the shared variables of Theano. Copy for once the data to the shared memory on the GPU. """ data_x, data_y = data_xy if train: dim_output = 10 # case of MNIST data_y = np.int32(self.labels(data_y, dim_output)) shared_x = theano.shared( np.asarray(data_x, dtype = theano.config.floatX), borrow=borrow) shared_y = theano.shared ( np.asarray(data_y, dtype = theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32')
def load_data(self, dataset_path, share = False): """Load the data set. """ f = gzip.open(dataset_path, 'rb') train_set, valid_set, test_set = pickle.load(f) f.close() # share the data train_set_x, train_set_y = self.shared_dataset(train_set, train=True) valid_set_x, valid_set_y = self.shared_dataset(valid_set) test_set_x, test_set_y = self.shared_dataset(test_set) if share: reval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] else: reval = [train_set, valid_set, test_set] # NON-shared data (they didn't share the data in the code Crino!!!!!) return reval
def shared_dataset_xy(self, data_xy, nlabels = 10, train = False, task="cls", borrow=True): """Load the data to the shared variables of Theano. Copy for once the data to the shared memory on the GPU. """ data_x, data_y = data_xy if (train) and (task=='cls'): data_y = np.int32(self.labels(data_y, nlabels)) shared_x = theano.shared( np.asarray(data_x, dtype = theano.config.floatX), borrow=borrow) shared_y = theano.shared ( np.asarray(data_y, dtype = theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32')
def adadelta(tparams, grads, x, y, mask, lengths, cost): zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function([x, y, mask, lengths], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared') updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update') return f_grad_shared, f_update
def adadelta(tparams, grads, weightVector, iVector, jVector, cost): zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad' % k) for k, p in tparams.iteritems()] running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2' % k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2' % k) for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function([weightVector, iVector, jVector], cost, updates=zgup + rg2up, name='adadelta_f_grad_shared') updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)] f_update = theano.function([], [], updates=ru2up + param_up, on_unused_input='ignore', name='adadelta_f_update') return f_grad_shared, f_update
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True): #re-scale dropout at training time, so we don't need to at test time if scaled: proj = tensor.switch( use_noise, trng.binomial(shape, p=value, n=1, dtype='float32')/value, theano.shared(numpy.float32(1.))) else: proj = tensor.switch( use_noise, trng.binomial(shape, p=value, n=1, dtype='float32'), theano.shared(numpy.float32(value))) return proj # feedforward layer: affine transformation + point-wise nonlinearity
def mdclW(num_filters,num_channels,filter_size,winit,name,scales): # Coefficient Initializer sinit = lasagne.init.Constant(1.0/(1+len(scales))) # Total filter size size = filter_size + (filter_size-1)*(scales[-1]-1) # Multiscale Dilated Filter W = T.zeros((num_filters,num_channels,size,size)) # Undilated Base Filter baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W') for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale], baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x')) return W # Subpixel Upsample Layer from (https://arxiv.org/abs/1609.05158) # This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style # as done in the ESPCN Magic ony paper for super-resolution. # r is the upscale factor. # c is the number of output channels.
def load_weights(params, path, num_conv): print 'Loading gan weights from ' + path with h5py.File(path, 'r') as hdf5: params['skipthought2image'] = theano.shared(np.copy(hdf5['skipthought2image'])) params['skipthought2image-bias'] = theano.shared(np.copy(hdf5['skipthought2image-bias'])) for i in xrange(num_conv): params['W_conv{}'.format(i)] = theano.shared(np.copy(hdf5['W_conv{}'.format(i)])) params['b_conv{}'.format(i)] = theano.shared(np.copy(hdf5['b_conv{}'.format(i)])) # Flip w,h axes params['W_conv{}'.format(i)] = params['W_conv{}'.format(i)][:,:,::-1,::-1] w = np.abs(np.copy(hdf5['W_conv{}'.format(i)])) print 'W_conv{}'.format(i), np.min(w), np.mean(w), np.max(w) b = np.abs(np.copy(hdf5['b_conv{}'.format(i)])) print 'b_conv{}'.format(i), np.min(b), np.mean(b), np.max(b) return params
def __init__(self, dimX, dimReadAttent, dimWriteAttent, dimRNNEnc, dimRNNDec, dimZ, runSteps, inputData, valData=None, testData=None, pathToWeights=None): self.dimX = dimX self.dimReadAttent = dimReadAttent self.dimWriteAttent = dimWriteAttent self.dimRNNEnc = dimRNNEnc self.dimRNNDec = dimRNNDec self.dimZ = dimZ self.runSteps = runSteps self.pathToWeights = pathToWeights self.n_batches = inputData.shape[0] / batch_size self.train_data = theano.shared(inputData) del inputData if valData != None: self.n_val_batches = valData.shape[0] / batch_size self.val_data = theano.shared(valData) del valData if testData != None: self.n_test_batches = testData.shape[0] / batch_size self.test_data = theano.shared(testData) del testData self._kl_final, self._logpxz, self._log_likelihood, self._c_ts, self._c_ts_gener, self._x, self._run_steps, self._updates_train, self._updates_gener, self._read_attent_params, self._write_attent_params, self._write_attent_params_gener, self._params = build_lstm_attention_vae(self.dimX, self.dimReadAttent, self.dimWriteAttent, self.dimRNNEnc, self.dimRNNDec, self.dimZ, self.runSteps, self.pathToWeights)
def shared_dataset_x(data_x, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x
def __init__(self, rng, n_in, n_out, minibatch_size): super(GRULayer, self).__init__() # Notation from: An Empirical Exploration of Recurrent Network Architectures self.n_in = n_in self.n_out = n_out # Initial hidden state self.h0 = theano.shared(value=np.zeros((minibatch_size, n_out)).astype(theano.config.floatX), name='h0', borrow=True) # Gate parameters: self.W_x = weights_Glorot(n_in, n_out*2, 'W_x', rng) self.W_h = weights_Glorot(n_out, n_out*2, 'W_h', rng) self.b = weights_const(1, n_out*2, 'b', 0) # Input parameters self.W_x_h = weights_Glorot(n_in, n_out, 'W_x_h', rng) self.W_h_h = weights_Glorot(n_out, n_out, 'W_h_h', rng) self.b_h = weights_const(1, n_out, 'b_h', 0) self.params = [self.W_x, self.W_h, self.b, self.W_x_h, self.W_h_h, self.b_h]
def setParams(self, W_IN, b_IN): # controllo sulle dimensioni if ( W_IN.shape[0] == self.W.shape.eval()[0] and W_IN.shape[1] == self.W.shape.eval()[1] and len(b_IN) == self.b.shape.eval()[0] ): self.W.set_value(W_IN) self.b.set_value(b_IN) #self.W = theano.shared(value=W_IN, name='W', borrow=True) # initialize the baises b as a vector of n_out 0s #self.b = theano.shared(value=b_IN, name='b', borrow=True) else : print "NEW_logistic_sgd:Errore nelle dimensioni delle matrici passate" print "W(input) shape", W_IN.shape, "W shape", self.W.shape.eval() print "b(input) shape", len(b_IN), "b shape", self.b.shape.eval()
def shared_dataset(data_x, data_y, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32')
def dist_info_sym(self, obs_var, latent_var=None): # this is ment to be for one path! # now this is not doing anything! And for computing the dist_info_vars of npo_snn_rewardMI it doesn't work if latent_var is None: latent_var1 = theano.shared(np.expand_dims(self.latent_fix, axis=0)) # new fix to avoid putting the latent as an input: just take the one fixed! latent_var = TT.tile(latent_var1, [obs_var.shape[0], 1]) # generate the generalized input (append latents to obs.) if self.bilinear_integration: extended_obs_var = TT.concatenate([obs_var, latent_var, TT.flatten(obs_var[:, :, np.newaxis] * latent_var[:, np.newaxis, :], outdim=2)] , axis=1) else: extended_obs_var = TT.concatenate([obs_var, latent_var], axis=1) mean_var, log_std_var = L.get_output([self._l_mean, self._l_log_std], extended_obs_var) if self.min_std is not None: log_std_var = TT.maximum(log_std_var, np.log(self.min_std)) return dict(mean=mean_var, log_std=log_std_var)
def step_gibbs(self, r_h, r_v, h, *params): '''Step Gibbs sample. Args: r_h (theano.randomstream): random variables for hiddens. r_v (theano.randomstream): random variables for visibles. h (T.tensor): hidden state. *params: theano shared variables. Returns: T.tensor: hidden samples. T.tensor: visible samples. T.tensor: conditional hidden probability. T.tensor: conditional visible probability. ''' v, pv = self.step_sv_h(r_v, h, *params) h, ph = self.step_sh_v(r_h, v, *params) return h, v, ph, pv
def step_free_energy(self, x, beta, *params): '''Step free energy function. Args: x (T.tensor): data sample. beta (float): beta value for annealing. *params: theano shared variables. Returns: T.tensor: free energy. ''' W, v_params, h_params = self.split_params(*params) vis_term = beta * self.v_dist.get_energy_bias(x, *v_params) x = self.v_dist.scale_for_energy_model(x, *v_params) hid_act = beta * (T.dot(x, W) + self.h_dist.get_center(*h_params)) fe = -vis_term - T.log(1. + T.exp(hid_act)).sum(axis=1) return fe
def step_free_energy_h(self, h, beta, *params): '''Step free energy function for hidden states. Args: h (T.tensor): hidden sample. beta (float): beta value for annealing. *params: theano shared variables. Returns: T.tensor: free energy. ''' W, v_params, h_params = self.split_params(*params) hid_term = beta * self.h_dist.get_energy_bias(h, *h_params) h = self.h_dist.scale_for_energy_model(h, *h_params) vis_act = beta * (T.dot(h, W.T) + self.v_dist.get_center(*v_params)) fe = -hid_term - T.log(1. + T.exp(vis_act)).sum(axis=1) return fe
def _step(self, m, y, h_, Ur): '''Step function for RNN call. Args: m (T.tensor): masks. y (T.tensor): inputs. h_ (T.tensor): recurrent state. Ur (theano.shared): recurrent connection. Returns: T.tensor: next recurrent state. ''' preact = T.dot(h_, Ur) + y h = T.tanh(preact) h = m * h + (1 - m) * h_ return h
def sgd(lr, tparams, grads, inp, cost, extra_ups=[], extra_outs=[], exclude_params=set([])): '''Stochastic gradient descent''' gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function( inp, [cost]+extra_outs, updates=gsup+extra_ups, profile=profile) pup = [(p, p - lr * g) for p, g in zip(tools.itemlist(tparams), gshared) if p.name not in exclude_params] if not isinstance(lr, list): lr = [lr] f_update = theano.function(lr, [], updates=pup, profile=profile) return f_grad_shared, f_update
def _generate_train_model_function(self, scores): u = T.lvector('u') i = T.lvector('i') j = T.lvector('j') self.W = theano.shared(numpy.zeros((self._dim)).astype('float32'), name='W'); self.S = theano.shared(scores, name='S'); x_ui = T.dot(self.W, self.S[u,i,:].T); x_uj = T.dot(self.W, self.S[u,j,:].T); x_uij = x_ui - x_uj; obj = T.sum( T.log(T.nnet.sigmoid(x_uij)).sum() - \ self._lambda_w * 0.5 * (self.W ** 2).sum() ) cost = -obj g_cost_W = T.grad(cost=cost, wrt=self.W) updates = [ (self.W, self.W - self._learning_rate * g_cost_W) ] self.train_model = theano.function(inputs=[u,i,j], outputs=cost, updates=updates);
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1*self.l**(t-1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t*m + (1 - b1_t)*g v_t = self.b2*v + (1 - self.b2)*g**2 m_c = m_t / (1-self.b1**t) v_c = v_t / (1-self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t) ) updates.append((t, t + 1.)) return updates
def metropolis_hastings_accept(energy_prev, energy_next, s_rng): """ Performs a Metropolis-Hastings accept-reject move. Parameters ---------- energy_prev: theano vector Symbolic theano tensor which contains the energy associated with the configuration at time-step t. energy_next: theano vector Symbolic theano tensor which contains the energy associated with the proposed configuration at time-step t+1. s_rng: theano.tensor.shared_randomstreams.RandomStreams Theano shared random stream object used to generate the random number used in proposal. Returns ------- return: boolean True if move is accepted, False otherwise """ ediff = energy_prev - energy_next return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
def draw(self, **kwargs): """ Returns a new position obtained after `n_steps` of HMC simulation. Parameters ---------- kwargs: dictionary The `kwargs` dictionary is passed to the shared variable (self.positions) `get_value()` function. For example, to avoid copying the shared variable value, consider passing `borrow=True`. Returns ------- rval: numpy matrix Numpy matrix whose of dimensions similar to `initial_position`. """ self.simulate() return self.positions.get_value(borrow=False)
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1*self.l**(t-1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t*m + (1 - b1_t)*g v_t = self.b2*v + (1 - self.b2)*g**2 m_c = m_t / (1-self.b1**t) v_c = v_t / (1-self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) return updates
def _init_params(self): self.W_hhs = [] self.b_hhs = [] for dx in xrange(self.n_layers): W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers], self.n_hids[dx], self.sparsity[dx], self.scale[dx], rng=self.rng) self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" % (dx,self.name))) if dx > 0: self.b_hhs.append(theano.shared( self.bias_fn[dx-1](self.n_hids[dx], self.bias_scale[dx-1], self.rng), name='b%d_%s' %(dx, self.name))) self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs] self.params_grad_scale = [self.grad_scale for x in self.params] if self.weight_noise: self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs] self.nb_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_hhs] self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nb_hhs] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): self.iBlocks = 1 # number of blocks in the input (from lower layer) W_em = self.init_fn(self.n_in, self.n_class, self.sparsity, self.scale, self.rng) self.W_em = theano.shared(W_em, name='W_%s' % self.name) self.b_em = theano.shared( self.bias_fn(self.n_class, self.bias_scale, self.rng), name='b_%s' % self.name) U_em = theano.shared(((self.rng.rand(self.iBlocks, self.n_class, self.n_in, self.n_words_class)-0.5)/(self.n_words_class*self.n_in) ).astype(theano.config.floatX), name='U_%s'%self.name) self.U_em = U_em c_em = numpy.zeros((self.n_class, self.n_words_class), dtype='float32') n_words_last_class = self.n_out % self.n_words_class #c_em[-1, n_words_last_class:] = -numpy.inf self.c_em = theano.shared(c_em, name='c_%s' % self.name) self.params = [self.W_em, self.b_em, self.U_em, self.c_em] self.params_grad_scale = [self.grad_scale for x in self.params]
def __init__(self, rng, std = 0.1, ndim=0, avg =0, shape_fn=None): """ """ assert rng is not None, "random number generator should not be empty!" super(GaussianNoise, self).__init__(0, 0, rng) self.std = scale self.avg = self.avg self.ndim = ndim self.shape_fn = shape_fn if self.shape_fn: # Name is not important as it is not a parameter of the model self.noise_term = theano.shared(numpy.zeros((2,)*ndim, dtype=theano.config.floatX), name='ndata') self.noise_params += [self.noise_term] self.noise_params_shape_fn += [shape_fn] self.trng = RandomStreams(rng.randint(1e5))
def fit(self, x): s = x.shape x = x.copy().reshape((s[0],np.prod(s[1:]))) m = np.mean(x, axis=0) x -= m sigma = np.dot(x.T,x) / x.shape[0] U, S, V = linalg.svd(sigma) tmp = np.dot(U, np.diag(1./np.sqrt(S+self.regularization))) tmp2 = np.dot(U, np.diag(np.sqrt(S+self.regularization))) self.ZCA_mat = th.shared(np.dot(tmp, U.T).astype(th.config.floatX)) self.inv_ZCA_mat = th.shared(np.dot(tmp2, U.T).astype(th.config.floatX)) self.mean = th.shared(m.astype(th.config.floatX))
def compile( self,s_inputs_, s_loss_, v_params_, s_grads_=None, s_reg_=0, fetches_=None, updates_=None, givens_=None, trunc_grad_=None, profile_=False ): ''' compile optimizer against specific model Args: s_inputs_: list of symbolic input tensors, including label s_loss_: optimization loss, symbolic scalar v_params_: list of shared parameters to optimize s_grads: list of gradients to apply, must be same order as v_params_, default is None (use autodiff). s_reg_: symbolic regularization term, default 0 (no regularization) updates: update operation for shared values after a step of optimization, usually RNN states. Takes form [(v_var, s_new_var), ...] Returns: None ''' self.s_loss = s_loss_ self.s_reg = s_reg_ if s_grads_ is None: s_grads_ = T.grad( self.s_loss + self.s_reg, list(v_params_), disconnected_inputs='warn') if type(trunc_grad_)==float: self.s_grads = [T.clip(g,-trunc_grad_,trunc_grad_) for g in s_grads_] else: self.s_grads = s_grads_
def init_tparams(params): tparams = OrderedDict() for kk, pp in params.iteritems(): tparams[kk] = theano.shared(params[kk], name=kk, borrow=True) add_role(tparams[kk], PARAMETER) return tparams # make prefix-appended name