我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用theano.tensor()。
def switch(condition, then_tensor, else_tensor): """ Keras' implementation of switch for tensorflow uses tf.switch which accepts only scalar conditions. It should use tf.select instead. """ if K.backend() == 'tensorflow': import tensorflow as tf condition_shape = condition.get_shape() input_shape = then_tensor.get_shape() if condition_shape[-1] != input_shape[-1] and condition_shape[-1] == 1: # This means the last dim is an embedding dim. Keras does not mask this dimension. But tf wants # the condition and the then and else tensors to be the same shape. condition = K.dot(tf.cast(condition, tf.float32), tf.ones((1, input_shape[-1]))) return tf.select(tf.cast(condition, dtype=tf.bool), then_tensor, else_tensor) else: import theano.tensor as T return T.switch(condition, then_tensor, else_tensor)
def dropout_layer(state_before, use_noise, trng): """ :todo: - Fix according to _param - Test! From Cho's code here: https://github.com/nyu-dl/dl4mt-tutorial/blob/master/session2/nmt.py#L45 """ proj = tensor.switch( use_noise, # for training state_before * trng.binomial(state_before.shape, p=0.5, n=1, dtype=state_before.dtype), # for validation/sampling state_before * 0.5) return proj
def build_encoder_bi(tparams, options): """ build bidirectional encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') embeddingr = embedding[::-1] x_mask = tensor.matrix('x_mask', dtype='float32') xr_mask = x_mask[::-1] # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, prefix='encoder_r', mask=xr_mask) ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1) return embedding, x_mask, ctx # some utilities
def _step(self, x_, m_, h_, c_): preact = tensor.dot(h_, self.U) + x_ i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim)) f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias) o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim)) j = tensor.tanh(_slice(preact, 3, self.hidden_dim)) c = f * c_ + i * j c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) if self.recurrent_dropout_layer != None: h = self.recurrent_dropout_layer.connect(h, self.is_train) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def connect(self, inputs, mask, is_train): """ is_train: A boolean tensor. """ max_length = inputs.shape[0] batch_size = inputs.shape[1] outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim), tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)] # Dropout mask sharing for variational dropout. self.is_train = is_train if self.recurrent_dropout_layer != None: self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train) inputs = tensor.dot(inputs, self.W) + self.b rval, _ = theano.scan(self._step, # Scan function sequences=[inputs, mask], # Input sequence outputs_info=outputs_info, name=_p(self.prefix, '_layers'), n_steps=max_length) # scan steps return rval[0]
def _step(self, x_, px_, m_, h_, c_): preact = tensor.dot(h_, self.U) + px_ # i: input. f: forget. o: output. t: transform. # j: input w\ non-linearity. k: input w\o non-linearity. i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim)) f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias) o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim)) t = tensor.nnet.sigmoid(_slice(preact, 3, self.hidden_dim)) j = tensor.tanh(_slice(preact, 4, self.hidden_dim)) c = f * c_ + i * j c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = t * o * tensor.tanh(c) + (1. - t) * x_ if self.recurrent_dropout_layer != None: h = self.recurrent_dropout_layer.connect(h, self.is_train) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def connect(self, inputs, mask, is_train): max_length = inputs.shape[0] batch_size = inputs.shape[1] outputs_info = [tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim), tensor.alloc(numpy_floatX(0.), batch_size, self.hidden_dim)] # Dropout layers self.is_train = is_train if self.recurrent_dropout_layer != None: self.recurrent_dropout_layer.generate_mask([batch_size, self.hidden_dim], is_train) proj_inputs = tensor.dot(inputs, self.W) + self.b rval, _ = theano.scan(self._step, # Scan function sequences=[inputs, proj_inputs, mask], # Input sequence outputs_info=outputs_info, name=_p(self.prefix, '_layers'), n_steps=max_length) # scan steps return rval[0]
def _step(self, x_, px_, m_, h_, c_): preact = tensor.dot(h_, self.U) + px_ i = tensor.nnet.sigmoid(_slice(preact, 0, self.hidden_dim)) f = tensor.nnet.sigmoid(_slice(preact, 1, self.hidden_dim) + self.forget_bias) o = tensor.nnet.sigmoid(_slice(preact, 2, self.hidden_dim)) j = tensor.tanh(_slice(preact, 3, self.hidden_dim)) c = f * c_ + i * j c = m_[:, None] * c + (1. - m_)[:, None] * c_ # Residual connection. h = o * tensor.tanh(c) + x_ if self.recurrent_dropout_layer != None: h = self.recurrent_dropout_layer.connect(h, self.is_train) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c
def adadelta(parameters, gradients, rho=0.95, eps=1e-6): """ Reference: ADADELTA: An Adaptive Learning Rate Method, Zeiler 2012. https://arxiv.org/abs/1212.5701 Adapted from the Adadelta implementation from Tensorflow. """ accum = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters] accum_updates = [theano.shared(numpy.zeros(p.get_value().shape, floatX)) for p in parameters] new_accum = [rho * g0 + (1.0 - rho) * (g**2) for g0, g in izip(accum, gradients)] updates = [tensor.sqrt(d0 + eps) / tensor.sqrt(g0 + eps) * g for d0, g0, g in izip(accum_updates, new_accum, gradients)] new_accum_updates = [rho * d0 + (1.0 - rho) * (d**2) for d0, d in izip(accum_updates, updates)] accum_ = zip(accum, new_accum) accum_updates_ = zip(accum_updates, new_accum_updates) parameters_ = [ (p, (p - d)) for p,d in izip(parameters, updates)] return accum_ + accum_updates_ + parameters_
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True): #re-scale dropout at training time, so we don't need to at test time if scaled: proj = tensor.switch( use_noise, trng.binomial(shape, p=value, n=1, dtype='float32')/value, theano.shared(numpy.float32(1.))) else: proj = tensor.switch( use_noise, trng.binomial(shape, p=value, n=1, dtype='float32'), theano.shared(numpy.float32(value))) return proj # feedforward layer: affine transformation + point-wise nonlinearity
def mdclW(num_filters,num_channels,filter_size,winit,name,scales): # Coefficient Initializer sinit = lasagne.init.Constant(1.0/(1+len(scales))) # Total filter size size = filter_size + (filter_size-1)*(scales[-1]-1) # Multiscale Dilated Filter W = T.zeros((num_filters,num_channels,size,size)) # Undilated Base Filter baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W') for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale], baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x')) return W # Subpixel Upsample Layer from (https://arxiv.org/abs/1609.05158) # This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style # as done in the ESPCN Magic ony paper for super-resolution. # r is the upscale factor. # c is the number of output channels.
def errors(self, y): """Return a float representing the number of errors in the minibatch over the total number of examples of the minibatch ; zero one loss over the size of the minibatch :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label """ # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError( 'y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type) ) # check if y is of the correct datatype if y.dtype.startswith('int'): # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.mean(T.neq(self.y_pred, y)) else: raise NotImplementedError()
def errors(self, y): """Return a float representing the number of errors in the minibatch over the total number of examples of the minibatch ; zero one loss over the size of the minibatch :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label """ # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError('y should have the same shape as self.y_pred', ('y', target.type, 'y_pred', self.y_pred.type)) # check if y is of the correct datatype if y.dtype.startswith('int'): # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.mean(T.neq(self.y_pred, y)) else: raise NotImplementedError()
def applyActivationFunction_LeakyReLU( inputData, leakiness ) : """leakiness : float Slope for negative input, usually between 0 and 1. A leakiness of 0 will lead to the standard rectifier, a leakiness of 1 will lead to a linear activation function, and any value in between will give a leaky rectifier. [1] Maas et al. (2013): Rectifier Nonlinearities Improve Neural Network Acoustic Models, http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf - The input is a tensor of shape (batchSize, FeatMaps, xDim, yDim, zDim) """ pos = 0.5 * (1 + leakiness) neg = 0.5 * (1 - leakiness) output = pos * inputData + neg * abs(inputData) return (output) # *** There actually exist several ways to implement PReLU activations *** # PReLU activations (from Kamnitsas)
def applyActivationFunction_PReLU( inputData, PreluActivations ) : """Parametric Rectified Linear Unit. It follows: `f(x) = alpha * x for x < 0`, `f(x) = x for x >= 0`, where `alpha` is a learned array with the same shape as x. - The input is a tensor of shape (batchSize, FeatMaps, xDim, yDim, zDim) """ preluActivationsAsRow = PreluActivations.dimshuffle('x', 0, 'x', 'x', 'x') pos = T.maximum(0, inputData) neg = preluActivationsAsRow * (inputData - abs(inputData)) * 0.5 output = pos + neg return (output) # --- version 2 ---
def applyActivationFunction_PReLU_v2(inputData,PreluActivations) : """ inputData is a tensor5D with shape: (batchSize, Number of feature Maps, convolvedImageShape[0], convolvedImageShape[1], convolvedImageShape[2]) """ # The input is a tensor of shape (batchSize, FeatMaps, xDim, yDim, zDim) preluActivationsAsRow = PreluActivations.dimshuffle('x', 0, 'x', 'x', 'x') pos = ((inputData + abs(inputData)) / 2.0 ) neg = preluActivationsAsRow * ((inputData - abs(inputData)) / 2.0 ) output = pos + neg return ( output) # --- version 3 ---
def applyActivationFunction_PReLU_v3(inputData,PreluActivations) : """ inputData is a tensor5D with shape: (batchSize, Number of feature Maps, convolvedImageShape[0], convolvedImageShape[1], convolvedImageShape[2]) """ # The input is a tensor of shape (batchSize, FeatMaps, xDim, yDim, zDim) preluActivationsAsRow = PreluActivations.dimshuffle('x', 0, 'x', 'x', 'x') pos = 0.5 * (1 + preluActivationsAsRow ) neg = 0.5 * (1 - preluActivationsAsRow ) output = pos * inputData + neg * abs(inputData) return ( output) # Benchmark on ReLU/PReLU activations: # http://gforge.se/2015/06/benchmarking-relu-and-prelu/ # TODO. Implement some other activation functions: # Ex: Randomized ReLU # S-shape Relu # ThresholdedReLU
def errors(self, y): """Return a float representing the number of errors in the minibatch over the total number of examples of the minibatch ; zero one loss over the size of the minibatch :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label """ # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError( 'y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type) ) # check if y is of the correct datatype if y.dtype.startswith('int'): # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.mean(T.neq(self.y_pred, y)) else: return T.sum((y - self.y_pred) ** 2);
def wrapped_conv(*args, **kwargs): copy = dict(kwargs) copy.pop("image_shape", None) copy.pop("filter_shape", None) assert copy.pop("filter_flip", False) input, W, input_shape, get_W_shape = args if theano.config.device == 'cpu': return theano.tensor.nnet.conv2d(*args, **kwargs) try: return theano.sandbox.cuda.dnn.dnn_conv( input.astype('float32'), W.astype('float32'), **copy ) except Exception as e: print("falling back to default conv2d") return theano.tensor.nnet.conv2d(*args, **kwargs)
def dropout(state_before, is_train, trng): """ dropout with p=0.5 Parameters ---------- state_before : theano 3d tensor, input data, dimensions: (num of time steps, batch size, dim of vector) is_train : theano shared scalar, 0. = test/valid, 1. = train, trng : random number generator Returns ------- proj : theano 3d tensor, output data, dimensions: (num of time steps, batch size, dim of vector) """ proj = tensor.switch(is_train, state_before * trng.binomial(state_before.shape, p=0.5, n=1, dtype=state_before.dtype), state_before * 0.5) return proj
def fullyconnected_layer(tparams, state_below, options, prefix, activ='lambda x: x', **kwargs): """ compute the forward pass for a fully connected layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} state_below : theano 3d tensor, input data, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- : theano 3d tensor, output data, dimensions: (num of time steps, batch size, dim of vector) """ return eval(activ)(tensor.dot(state_below, tparams[p_name(prefix, 'W')]) + tparams[p_name(prefix, 'b')])
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a gate layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ # compute gating values, Eq.(3) G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0]) X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word) return eval(activ)(X)
def theano_logsumexp(x, axis=None): """ Compute log(sum(exp(x), axis=axis) in a numerically stable fashion. Parameters ---------- x : tensor_like A Theano tensor (any dimension will do). axis : int or symbolic integer scalar, or None Axis over which to perform the summation. `None`, the default, performs over all axes. Returns ------- result : ndarray or scalar The result of the log(sum(exp(...))) operation. """ xmax = x.max(axis=axis, keepdims=True) xmax_ = x.max(axis=axis) return xmax_ + T.log(T.exp(x - xmax).sum(axis=axis))
def compute_sample(self, state_below, temp=1, use_noise=False): """ Constructs the theano expression that samples from the output layer. :type state_below: tensor or layer :param state_below: The theano expression (or groundhog layer) representing the input of the cost layer :type temp: float or tensor scalar :param temp: scalar representing the temperature that should be used when sampling from the output distribution :type use_noise: bool :param use_noise: flag. If true, noise is used when computing the output of the model """ raise NotImplemented
def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs): """ initialization :param incoming: bidirectional mLSTM for passane :param num_units: :param max_steps: max num steps to generate answer words, can be tensor scalar variable :param peepholes: :param mask_input: passage's length mask :param kwargs: """ super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes, precompute_input=False, mask_input=mask_input, only_return_final=False, **kwargs) self.max_steps = max_steps # initializes attention weights input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer') # doesn't need transpose self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer') self.b_a_pointer = self.add_param(init.Constant(0.), (1, num_units), 'b_a_pointer') self.c_pointer = self.add_param(init.Constant(0.), (1, 1), 'c_pointer')
def __init__(self, incoming, num_units, max_steps, peepholes=False, mask_input=None, **kwargs): """ initialization :param incoming: bidirectional mLSTM for passane :param num_units: :param max_steps: max num steps to generate answer words, can be tensor scalar variable :param peepholes: :param mask_input: passage's length mask :param kwargs: """ super(AnsPointerLayer, self).__init__(incoming, num_units, peepholes=peepholes, precompute_input=False, mask_input=mask_input, only_return_final=False, **kwargs) self.max_steps = max_steps # initializes attention weights input_shape = self.input_shapes[0] num_inputs = np.prod(input_shape[2:]) self.V_pointer = self.add_param(init.Normal(0.1), (num_inputs, num_units), 'V_pointer') # doesn't need transpose self.v_pointer = self.add_param(init.Normal(0.1), (num_units, 1), 'v_pointer') self.W_a_pointer = self.add_param(init.Normal(0.1), (num_units, num_units), 'W_a_pointer') self.b_a_pointer = self.add_param(init.Constant(0.), (num_units, ), 'b_a_pointer') c_pointer = theano.shared(np.array([0.], dtype='float32'), name='c_pointer', broadcastable=(True, )) self.c_pointer = self.add_param(c_pointer, (1,), 'c_pointer')
def __init__(self, filter_size=(3,3), input_feature=None, output_feature=None, feature_map_multiplier=1, subsample=(1,1), border='half', need_bias=False, dc=0.0): """ This 2d convolution deals with 4d tensor: (batch_size, feature map/channel, filter_row, filter_col) feature_map_multiplier always has a ligher priority than input_feature/output_feature """ super(Conv2d, self).__init__() self.filterSize = filter_size self.inputFeature = input_feature self.outputFeature = output_feature self.mapMulti = feature_map_multiplier self.border = border self.subsample = subsample self.need_bias = need_bias self.dc = dc self.w = None self.b = None
def set_to_zero(list_of_tensors_and_shapes, on_gpu=True): """ :param: list_of_tensors_and_shapes of the form [(tensor1, shape1), ...] """ if on_gpu: updates = [] for tensor, shape in list_of_tensors_and_shapes: if np.sum(shape) == 1: updates.append((tensor, 0)) else: updates.append((tensor, T.patternbroadcast(T.zeros(shape), [False] * tensor.ndim))) return updates else: updates = [] for tensor, shape in list_of_tensors_and_shapes: updates.append((tensor, np.zeros(shape, dtype=config_.floatX))) return updates
def __init__(self, random_seed=dt.datetime.now().microsecond, compute_grad=True): self.rng = np.random.RandomState(random_seed) self.batch_size = cfg.CONST.BATCH_SIZE self.img_w = cfg.CONST.IMG_W self.img_h = cfg.CONST.IMG_H self.n_vox = cfg.CONST.N_VOX self.compute_grad = compute_grad # (self.batch_size, 3, self.img_h, self.img_w), # override x and is_x_tensor4 when using multi-view network self.x = tensor.tensor4() self.is_x_tensor4 = True # (self.batch_size, self.n_vox, 2, self.n_vox, self.n_vox), self.y = tensor5() self.activations = [] # list of all intermediate activations self.loss = [] # final loss self.output = [] # final output self.error = [] # final output error self.params = [] # all learnable params self.grads = [] # will be filled out automatically self.setup()
def set_output(self): output_shape = self._output_shape padding = self._padding unpool_size = self._unpool_size unpooled_output = tensor.alloc(0.0, # Value to fill the tensor output_shape[0], output_shape[1] + 2 * padding[0], output_shape[2], output_shape[3] + 2 * padding[1], output_shape[4] + 2 * padding[2]) unpooled_output = tensor.set_subtensor(unpooled_output[:, padding[0]:output_shape[ 1] + padding[0]:unpool_size[0], :, padding[1]:output_shape[3] + padding[1]:unpool_size[ 1], padding[2]:output_shape[4] + padding[2]:unpool_size[2]], self._prev_layer.output) self._output = unpooled_output
def set_output(self): padding = self._padding input_shape = self._input_shape if np.sum(self._padding) > 0: padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor( padded_input[:, padding[1]:padding[1] + input_shape[1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) else: padded_input = self._prev_layer.output self._output = conv3d2d.conv3d(padded_input, self.W.val) + \ self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[ 1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) fc_output = tensor.reshape( tensor.dot(self._fc_layer.output, self.Wx.val), self._output_shape) self._output = conv3d2d.conv3d(padded_input, self.Wh.val) + \ fc_output + self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def set_output(self): padding = self._padding input_shape = self._input_shape padded_input = tensor.alloc(0.0, # Value to fill the tensor input_shape[0], input_shape[1] + 2 * padding[1], input_shape[2], input_shape[3] + 2 * padding[3], input_shape[4] + 2 * padding[4]) padded_input = tensor.set_subtensor(padded_input[:, padding[1]:padding[1] + input_shape[ 1], :, padding[3]:padding[3] + input_shape[3], padding[4]:padding[4] + input_shape[4]], self._prev_layer.output) self._output = conv3d2d.conv3d(padded_input, self.W.val) + \ self.b.val.dimshuffle('x', 'x', 0, 'x', 'x')
def dropout_layer(state_before, use_noise, trng): """ tensor switch is like an if statement that checks the value of the theano shared variable (use_noise), before either dropping out the state_before tensor or computing the appropriate activation. During training/testing use_noise is toggled on and off. """ proj = tensor.switch( use_noise, state_before * trng.binomial(state_before.shape, p=0.5, n=1, dtype=state_before.dtype), state_before * 0.5) return proj # make prefix-appended name
def build_multi_dssm(input_var=None, num_samples=None, num_entries=6, num_ngrams=42**3, num_hid1=300, num_hid2=300, num_out=128): """Builds a DSSM structure in a Lasagne/Theano way. The built DSSM is the neural network that computes the projection of only one paper. The input ``input_var`` should have two dimensions: (``num_samples * num_entries``, ``num_ngrams``). The output is then computed in a batch way: one paper at a time, but all papers from the same sample in the dataset are grouped (cited papers, citing papers and ``num_entries - 2`` irrelevant papers). Args: input_var (:class:`theano.tensor.TensorType` or None): symbolic input variable of the DSSM num_samples (int): the number of samples in the batch input dataset (number of rows) num_entries (int): the number of compared papers in the DSSM structure num_ngrams (int): the size of the vocabulary num_hid1 (int): the number of units in the first hidden layer num_hid2 (int): the number of units in the second hidden layer num_out (int): the number of units in the output layer Returns: :class:`lasagne.layers.Layer`: the output layer of the DSSM """ assert (num_entries > 2) # Initialise input layer if num_samples is None: num_rows = None else: num_rows = num_samples * num_entries l_in = layers.InputLayer(shape=(num_rows, num_ngrams), input_var=input_var) # Initialise the hidden and output layers or the DSSM l_hid1 = layers.DenseLayer(l_in, num_units=num_hid1, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform()) l_hid2 = layers.DenseLayer(l_hid1, num_units=num_hid2, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform()) l_out = layers.DenseLayer(l_hid2, num_units=num_out, nonlinearity=nonlinearities.tanh, W=init.GlorotUniform()) l_out = layers.ExpressionLayer(l_out, lambda X: X / X.norm(2), output_shape='auto') return l_out
def compute_loss(output, num_samples, num_entries=6, gamma=500.0): """Compute the loss of a dataset, given the output of the DSSM. Args: output (:class:`lasagne.layers.Layer`): the output of the DSSM num_samples (int): the number of samples in the dataset num_entries (int): the number of compared papers in the DSSM structure gamma (float): the coefficient applied in the softmax of the similarities Returns: theano.tensor.TensorType: the loss of the dataset """ assert (num_entries > 2) assert (num_samples > 0) # Post-NN operations to compute the loss # First, we extract the first output of each bundle mask = np.zeros(num_entries * num_samples) mask[::num_entries] = 1 unmask = np.ones(num_entries * num_samples) - mask cited = T.extra_ops.compress(mask, output, axis=0) odocs = T.extra_ops.compress(unmask, output, axis=0) # We duplicate each row 'x' num_entries-1 times cited = T.extra_ops.repeat(cited, num_entries-1, axis=0) # Then we compute element-wise product of x with each y, for each bundle sims = T.sum(cited * odocs, axis=1) # We reshape the similarities sims = T.reshape(sims, (num_samples, num_entries-1)) sims = gamma * sims # We take the softmax of each row probs = T.nnet.softmax(sims) # We compute the loss as the sum of element on the first column loss_mask = np.zeros(num_entries-1) loss_mask[0] = 1 loss = T.extra_ops.compress(loss_mask, probs, axis=1) return -T.log(T.prod(loss))
def extend_middle_dim(_2D, num): """ Gets a 2D tensor (A, B), outputs a 3D tensor (A, num, B) :usage: >>> TODO """ rval = _2D.dimshuffle((0, 'x', 1)) rval = T.alloc(rval, rval.shape[0], num, rval.shape[2]) return rval
def T_one_hot(inp_tensor, n_classes): """ :todo: - Implement other methods from here: - Compare them speed-wise for different sizes - Implement N_one_hot for Numpy version, with speed tests. Theano one-hot (1-of-k) from an input tensor of indecies. If the indecies are of the shape (a0, a1, ..., an) the output shape would be (a0, a1, ..., a2, n_classes). :params: - inp_tensor: any theano tensor with dtype int* as indecies and all of them between [0, n_classes-1]. - n_classes: number of classes which determines the output size. :usage: >>> idx = T.itensor3() >>> idx_val = numpy.array([[[0,1,2,3],[4,5,6,7]]], dtype='int32') >>> one_hot = T_one_hot(t, 8) >>> one_hot.eval({idx:idx_val}) >>> print out array([[[[ 1., 0., 0., 0., 0., 0., 0., 0.], [ 0., 1., 0., 0., 0., 0., 0., 0.], [ 0., 0., 1., 0., 0., 0., 0., 0.], [ 0., 0., 0., 1., 0., 0., 0., 0.]], [[ 0., 0., 0., 0., 1., 0., 0., 0.], [ 0., 0., 0., 0., 0., 1., 0., 0.], [ 0., 0., 0., 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 0., 0., 0., 1.]]]]) >>> print idx_val.shape, out.shape (1, 2, 4) (1, 2, 4, 8) """ flattened = inp_tensor.flatten() z = T.zeros((flattened.shape[0], n_classes), dtype=theano.config.floatX) one_hot = T.set_subtensor(z[T.arange(flattened.shape[0]), flattened], 1) out_shape = [inp_tensor.shape[i] for i in xrange(inp_tensor.ndim)] + [n_classes] one_hot = one_hot.reshape(out_shape) return one_hot
def build_encoder(tparams, options): """ build an encoder, given pre-computed word embeddings """ # word embedding (source) embedding = tensor.tensor3('embedding', dtype='float32') x_mask = tensor.matrix('x_mask', dtype='float32') # encoder proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return embedding, x_mask, ctx
def connect(self, inputs): features = [None] * self.num_feature_types for i in range(self.num_feature_types): indices = inputs[:,:,i].flatten() proj_shape = [inputs.shape[0], inputs.shape[1], self.embedding_shapes[i][1]] features[i] = self.embeddings[i][indices].reshape(proj_shape) if self.num_feature_types == 1: return features[0] return tensor.concatenate(features, axis=2)
def connect(self, inputs): energy = tensor.dot(inputs, self.W) + self.b energy = energy.reshape([energy.shape[0] * energy.shape[1], energy.shape[2]]) log_scores = tensor.log(tensor.nnet.softmax(energy)) predictions = tensor.argmax(log_scores, axis=-1) return (log_scores, predictions)