我们从Python开源项目中,提取了以下35个代码示例,用于说明如何使用lasagne.nonlinearities()。
def exe_rnn(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) layer_rnn = RecurrentLayer(layer_input, num_units, nonlinearity=nonlinearities.tanh, only_return_final=True, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), name='RNN') # W = layer_rnn.W_hid_to_hid.sum() # U = layer_rnn.W_in_to_hid.sum() # b = layer_rnn.b.sum() layer_output = DenseLayer(layer_rnn, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_rnn, input_var, target_var, batch_size, length, position, binominal)
def build_critic(input_var=None): from lasagne.layers import (InputLayer, Conv2DLayer, ReshapeLayer, DenseLayer) try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import LeakyRectify lrelu = LeakyRectify(0.2) # input: (None, 1, 28, 28) layer = InputLayer(shape=(None, 1, 28, 28), input_var=input_var) # two convolutions layer = batch_norm(Conv2DLayer(layer, 64, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer(layer, 128, 5, stride=2, pad='same', nonlinearity=lrelu)) # fully-connected layer layer = batch_norm(DenseLayer(layer, 1024, nonlinearity=lrelu)) # output layer (linear) layer = DenseLayer(layer, 1, nonlinearity=None) print ("critic output:", layer.output_shape) return layer
def build_critic(input_var=None, verbose=False): from lasagne.layers import (InputLayer, Conv2DLayer, ReshapeLayer, DenseLayer) try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import LeakyRectify, sigmoid lrelu = LeakyRectify(0.2) # input: (None, 1, 28, 28) layer = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # two convolutions layer = batch_norm(Conv2DLayer(layer, 128, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer(layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer(layer, 512, 5, stride=2, pad='same', nonlinearity=lrelu)) # # fully-connected layer # layer = batch_norm(DenseLayer(layer, 1024, nonlinearity=lrelu)) # output layer (linear) layer = DenseLayer(layer, 1, nonlinearity=None) if verbose: print ("critic output:", layer.output_shape) return layer
def build_BiRNN_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh, precompute_input=True, num_filters=20, dropout=True, in_to_out=False): # first get some necessary dimensions or parameters conv_window = 3 _, sent_length, _ = incoming2.output_shape # dropout before cnn? if dropout: incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5) # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match rnn incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) return build_BiRNN(incoming, num_units, mask=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity, precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def build_BiLSTM_CNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, num_filters=20, dropout=True, in_to_out=False): # first get some necessary dimensions or parameters conv_window = 3 _, sent_length, _ = incoming2.output_shape # dropout before cnn? if dropout: incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5) # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, sent_length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_cnn_layer, incoming2], axis=2) return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes, precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def conv(network, batch_norm, num_layers, num_filters, filter_size, pad, pool_size, dropout): for k in range(num_layers): network = lnn.layers.Conv2DLayer( network, num_filters=num_filters, filter_size=filter_size, W=lnn.init.Orthogonal(gain=np.sqrt(2 / (1 + .1 ** 2))), pad=pad, nonlinearity=lnn.nonlinearities.rectify, name='Conv_{}'.format(k)) if batch_norm: network = lnn.layers.batch_norm(network) if pool_size: network = lnn.layers.MaxPool2DLayer(network, pool_size=pool_size, name='Pool') if dropout > 0.0: network = lnn.layers.DropoutLayer(network, p=dropout) return network
def gap(network, out_size, batch_norm, gap_nonlinearity, out_nonlinearity): gap_nonlinearity = getattr(lnn.nonlinearities, gap_nonlinearity) out_nonlinearity = getattr(lnn.nonlinearities, out_nonlinearity) # output classification layer network = lnn.layers.Conv2DLayer( network, num_filters=out_size, filter_size=1, nonlinearity=gap_nonlinearity, name='Output_Conv') if batch_norm: network = lnn.layers.batch_norm(network) network = lnn.layers.Pool2DLayer( network, pool_size=network.output_shape[-2:], ignore_border=False, mode='average_exc_pad', name='GlobalAveragePool') network = lnn.layers.FlattenLayer(network, name='Flatten') network = lnn.layers.NonlinearityLayer( network, nonlinearity=out_nonlinearity, name='output') return network
def dense(network, batch_norm, nonlinearity, num_layers, num_units, dropout): nl = getattr(lnn.nonlinearities, nonlinearity) for i in range(num_layers): network = lnn.layers.DenseLayer( network, num_units=num_units, nonlinearity=nl, name='fc-{}'.format(i) ) if batch_norm: network = lnn.layers.batch_norm(network) if dropout > 0.0: network = lnn.layers.DropoutLayer(network, p=dropout) return network
def discriminator(input_var): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(4, 4), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = lasagne.layers.Conv2DLayer( network, num_filters=16, filter_size=(4, 4), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1,nonlinearity=linear) return network
def discriminator(input_var): network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = lasagne.layers.Conv2DLayer( network, num_filters=32, filter_size=(4, 4), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network = lasagne.layers.Conv2DLayer( network, num_filters=16, filter_size=(4, 4), nonlinearity=lasagne.nonlinearities.rectify) network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2)) network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1,nonlinearity=sigmoid) return network
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), crop=0, untie_biases=False, W=initmethod(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs): super(DeconvLayer, self).__init__( incoming, num_filters, filter_size, stride, crop, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) # rename self.crop to self.pad self.crop = self.pad del self.pad
def __init__(self, incoming, num_units, mask_generator,layerIdx,W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): super(MaskedLayer, self).__init__(incoming, num_units, W,b, nonlinearity,**kwargs) self.mask_generator = mask_generator num_inputs = int(np.prod(self.input_shape[1:])) self.weights_mask = self.add_param(spec = np.ones((num_inputs, num_units),dtype=np.float32), shape = (num_inputs, num_units), name='weights_mask', trainable=False, regularizable=False) self.layerIdx = layerIdx self.shuffle_update = [(self.weights_mask, mask_generator.get_mask_layer_UPDATE(self.layerIdx))]
def get_output_for(self,input, **kwargs): if input.ndim > 2: input = input.flatten(2) activation = T.dot(input, self.W*self.weights_mask) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return self.nonlinearity(activation) # Conditioning Masked Layer # Currently not used. # class CML(MaskedLayer): # def __init__(self, incoming, num_units, mask_generator,use_cond_mask=False,U=lasagne.init.GlorotUniform(),W=lasagne.init.GlorotUniform(), # b=init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): # super(CML, self).__init__(incoming, num_units, mask_generator,W, # b, nonlinearity,**kwargs) # self.use_cond_mask=use_cond_mask # if use_cond_mask: # self.U = self.add_param(spec = U, # shape = (num_inputs, num_units), # name='U', # trainable=True, # regularizable=False)theano.shared(value=self.weights_initialization((self.n_in, self.n_out)), name=self.name+'U', borrow=True) # self.add_param(self.U,name = # def get_output_for(self,input,**kwargs): # lin = self.lin_output = T.dot(input, self.W * self.weights_mask) + self.b # if self.use_cond_mask: # lin = lin+T.dot(T.ones_like(input), self.U * self.weights_mask) # return lin if self._activation is None else self._activation(lin) # Made layer, adopted from M.Germain
def exe_maxru(length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') time_updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) time_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.GlorotUniform()) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_taru = MAXRULayer(layer_input, num_units, max_length=length, P_time=lasagne.init.GlorotUniform(), nonlinearity=nonlinearities.tanh, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, time_updategate=time_updategate, time_update=time_update, only_return_final=True, name='MAXRU', p=0.) # W = layer_taru.W_hid_to_hidden_update.sum() # U = layer_taru.W_in_to_hidden_update.sum() # b = layer_taru.b_hidden_update.sum() layer_output = DenseLayer(layer_taru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, input_var, target_var, batch_size, length, position, binominal)
def exe_lstm(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) ingate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_lstm = LSTMLayer(layer_input, num_units, ingate=ingate, forgetgate=forgetgate, cell=cell, outgate=outgate, peepholes=False, nonlinearity=nonlinearities.tanh, only_return_final=True, name='LSTM') # W = layer_lstm.W_hid_to_cell.sum() # U = layer_lstm.W_in_to_cell.sum() # b = layer_lstm.b_cell.sum() layer_output = DenseLayer(layer_lstm, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_lstm, input_var, target_var, batch_size, length, position, binominal)
def exe_gru(use_embedd, length, num_units, position, binominal, reset_input): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(batch_size, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) resetgate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_gru = GRULayer_ANA(layer_input, num_units, resetgate=resetgate, updategate=updategate, hidden_update=hiden_update, reset_input=reset_input, only_return_final=True, name='GRU') # W = layer_gru.W_hid_to_hidden_update.sum() # U = layer_gru.W_in_to_hidden_update.sum() # b = layer_gru.b_hidden_update.sum() layer_output = DenseLayer(layer_gru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_gru, input_var, target_var, batch_size, length, position, binominal)
def exe_sgru(use_embedd, length, num_units, position, binominal): batch_size = BATCH_SIZE input_var = T.tensor3(name='inputs', dtype=theano.config.floatX) target_var = T.ivector(name='targets') layer_input = lasagne.layers.InputLayer(shape=(None, length, 1), input_var=input_var, name='input') if use_embedd: layer_position = construct_position_input(batch_size, length, num_units) layer_input = lasagne.layers.concat([layer_input, layer_position], axis=2) resetgate_input = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) resetgate_hidden = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) updategate = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None) hiden_update = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=nonlinearities.tanh) layer_sgru = SGRULayer(layer_input, num_units, resetgate_input=resetgate_input, resetgate_hidden=resetgate_hidden, updategate=updategate, hidden_update=hiden_update, only_return_final=True, name='SGRU') # W = layer_gru.W_hid_to_hidden_update.sum() # U = layer_gru.W_in_to_hidden_update.sum() # b = layer_gru.b_hidden_update.sum() layer_output = DenseLayer(layer_sgru, num_units=1, nonlinearity=nonlinearities.sigmoid, name='output') return train(layer_output, layer_sgru, input_var, target_var, batch_size, length, position, binominal)
def build_generator(input_var=None): from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer try: from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer except ImportError: raise ImportError("Your Lasagne is too old. Try the bleeding-edge " "version: http://lasagne.readthedocs.io/en/latest/" "user/installation.html#bleeding-edge-version") try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import sigmoid # input: 100dim layer = InputLayer(shape=(None, 100), input_var=input_var) # fully-connected layer layer = batch_norm(DenseLayer(layer, 1024)) # project and reshape layer = batch_norm(DenseLayer(layer, 128*7*7)) layer = ReshapeLayer(layer, ([0], 128, 7, 7)) # two fractional-stride convolutions layer = batch_norm(Deconv2DLayer(layer, 64, 5, stride=2, crop='same', output_size=14)) layer = Deconv2DLayer(layer, 1, 5, stride=2, crop='same', output_size=28, nonlinearity=sigmoid) print ("Generator output:", layer.output_shape) return layer
def build_generator(input_var=None, verbose=False): from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer try: from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer except ImportError: raise ImportError("Your Lasagne is too old. Try the bleeding-edge " "version: http://lasagne.readthedocs.io/en/latest/" "user/installation.html#bleeding-edge-version") try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import sigmoid # input: 100dim layer = InputLayer(shape=(None, 100), input_var=input_var) # # fully-connected layer # layer = batch_norm(DenseLayer(layer, 1024)) # project and reshape layer = batch_norm(DenseLayer(layer, 1024*4*4)) layer = ReshapeLayer(layer, ([0], 1024, 4, 4)) # two fractional-stride convolutions layer = batch_norm(Deconv2DLayer(layer, 512, 5, stride=2, crop='same', output_size=8)) layer = batch_norm(Deconv2DLayer(layer, 256, 5, stride=2, crop='same', output_size=16)) layer = Deconv2DLayer(layer, 3, 5, stride=2, crop='same', output_size=32, nonlinearity=sigmoid) if verbose: print ("Generator output:", layer.output_shape) return layer
def build_BiRNN(incoming, num_units, mask=None, grad_clipping=0, nonlinearity=nonlinearities.tanh, precompute_input=True, dropout=True, in_to_out=False): # construct the forward and backward rnns. Now, Ws are initialized by He initializer with default arguments. # Need to try other initializers for specific tasks. # dropout for incoming if dropout: incoming = lasagne.layers.DropoutLayer(incoming, p=0.5) rnn_forward = lasagne.layers.RecurrentLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity, precompute_input=precompute_input, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), name='forward') rnn_backward = lasagne.layers.RecurrentLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearity, precompute_input=precompute_input, W_in_to_hid=lasagne.init.GlorotUniform(), W_hid_to_hid=lasagne.init.GlorotUniform(), backwards=True, name='backward') # concatenate the outputs of forward and backward RNNs to combine them. concat = lasagne.layers.concat([rnn_forward, rnn_backward], axis=2, name="bi-rnn") # dropout for output if dropout: concat = lasagne.layers.DropoutLayer(concat, p=0.5) if in_to_out: concat = lasagne.layers.concat([concat, incoming], axis=2) # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units) return concat
def build_BiLSTM_HighCNN(incoming1, incoming2, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, num_filters=20, dropout=True, in_to_out=False): # first get some necessary dimensions or parameters conv_window = 3 _, sent_length, _ = incoming2.output_shape # dropout before cnn if dropout: incoming1 = lasagne.layers.DropoutLayer(incoming1, p=0.5) # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(incoming1, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match highway incoming layer [batch * sent_length, num_filters, 1] --> [batch * sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, ([0], -1)) # dropout after cnn? # if dropout: # output_cnn_layer = lasagne.layers.DropoutLayer(output_cnn_layer, p=0.5) # construct highway layer highway_layer = HighwayDenseLayer(output_cnn_layer, nonlinearity=nonlinearities.rectify) # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters] --> [batch, sent_length, number_filters] output_highway_layer = lasagne.layers.reshape(highway_layer, (-1, sent_length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_highway_layer, incoming2], axis=2) return build_BiLSTM(incoming, num_units, mask=mask, grad_clipping=grad_clipping, peepholes=peepholes, precompute_input=precompute_input, dropout=dropout, in_to_out=in_to_out)
def build_net(in_shape, out_size, model): # input variables input_var = (tt.tensor3('input', dtype='float32') if len(in_shape) > 1 else tt.matrix('input', dtype='float32')) target_var = tt.matrix('target_output', dtype='float32') # stack more layers network = lnn.layers.InputLayer( name='input', shape=(None,) + in_shape, input_var=input_var) if 'conv' in model and model['conv']: # reshape to 1 "color" channel network = lnn.layers.reshape( network, shape=(-1, 1) + in_shape, name='reshape') for c in sorted(model['conv'].keys()): network = blocks.conv(network, **model['conv'][c]) # no more output layer if gap is already there! if 'gap' in model and model['gap']: network = blocks.gap(network, out_size=out_size, out_nonlinearity=model['out_nonlinearity'], **model['gap']) else: if 'dense' in model and model['dense']: network = blocks.dense(network, **model['dense']) # output layer out_nl = getattr(lnn.nonlinearities, model['out_nonlinearity']) network = lnn.layers.DenseLayer( network, name='output', num_units=out_size, nonlinearity=out_nl) return network, input_var, target_var
def recurrent(network, mask_in, num_rec_units, num_layers, dropout, bidirectional, nonlinearity): if nonlinearity != 'LSTM': nl = getattr(lnn.nonlinearities, nonlinearity) def add_layer(prev_layer, **kwargs): return lnn.layers.RecurrentLayer( prev_layer, num_units=num_rec_units, mask_input=mask_in, nonlinearity=nl, W_in_to_hid=lnn.init.GlorotUniform(), W_hid_to_hid=lnn.init.Orthogonal(gain=np.sqrt(2) / 2), **kwargs) else: def add_layer(prev_layer, **kwargs): return lnn.layers.LSTMLayer( prev_layer, num_units=num_rec_units, mask_input=mask_in, **kwargs ) fwd = network for i in range(num_layers): fwd = add_layer(fwd, name='rec_fwd_{}'.format(i)) if dropout > 0.: fwd = lnn.layers.DropoutLayer(fwd, p=dropout) if not bidirectional: return network bck = network for i in range(num_layers): bck = add_layer(bck, name='rec_bck_{}'.format(i), backwards=True) if dropout > 0: bck = lnn.layers.DropoutLayer(bck, p=dropout) # combine the forward and backward recurrent layers... network = lnn.layers.ConcatLayer([fwd, bck], name='fwd + bck', axis=-1) return network
def default_Fout(self): #return lambda x: theano.tensor.maximum(x, 0.) return lasagne.nonlinearities.LeakyRectify(0.1) #return getattr(lasagne.nonlinearities, self.default_nonlinearity)
def nonlinearity(self, type): if "out": return getattr(lasagne.nonlinearities, self.nonlinearity_out) elif "hid": return getattr(lasagne.nonlinearities, self.nonlinearity_hid)
def batch_norm(layer, **kwargs): """ Apply batch normalization to an existing layer. This is a convenience function modifying an existing layer to include batch normalization: It will steal the layer's nonlinearity if there is one (effectively introducing the normalization right before the nonlinearity), remove the layer's bias if there is one (because it would be redundant), and add a :class:`BatchNormLayer` and :class:`NonlinearityLayer` on top. Parameters ---------- layer : A :class:`Layer` instance The layer to apply the normalization to; note that it will be irreversibly modified as specified above **kwargs Any additional keyword arguments are passed on to the :class:`BatchNormLayer` constructor. Returns ------- BatchNormLayer or NonlinearityLayer instance A batch normalization layer stacked on the given modified `layer`, or a nonlinearity layer stacked on top of both if `layer` was nonlinear. Examples -------- Just wrap any layer into a :func:`batch_norm` call on creating it: >>> from lasagne.layers import InputLayer, DenseLayer, batch_norm >>> from lasagne.nonlinearities import tanh >>> l1 = InputLayer((64, 768)) >>> l2 = batch_norm(DenseLayer(l1, num_units=500, nonlinearity=tanh)) This introduces batch normalization right before its nonlinearity: >>> from lasagne.layers import get_all_layers >>> [l.__class__.__name__ for l in get_all_layers(l2)] ['InputLayer', 'DenseLayer', 'BatchNormLayer', 'NonlinearityLayer'] """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = lasagne.nonlinearities.identity if hasattr(layer, 'b') and layer.b is not None: del layer.params[layer.b] layer.b = None layer = BatchNormLayer(layer, **kwargs) if nonlinearity is not None: layer = L.NonlinearityLayer(layer, nonlinearity) return layer
def build_BiLSTM(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, peepholes=False, dropout=True, in_to_out=False): # construct the forward and backward rnns. Now, Ws are initialized by Glorot initializer with default arguments. # Need to try other initializers for specific tasks. # dropout for incoming if dropout: incoming = lasagne.layers.DropoutLayer(incoming, p=0.5) ingate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) lstm_forward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearities.tanh, peepholes=peepholes, precompute_input=precompute_input, ingate=ingate_forward, outgate=outgate_forward, forgetgate=forgetgate_forward, cell=cell_forward, name='forward') ingate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) outgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. forgetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) # now use tanh for nonlinear function of cell, need to try pure linear cell cell_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) lstm_backward = lasagne.layers.LSTMLayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, nonlinearity=nonlinearities.tanh, peepholes=peepholes, precompute_input=precompute_input, backwards=True, ingate=ingate_backward, outgate=outgate_backward, forgetgate=forgetgate_backward, cell=cell_backward, name='backward') # concatenate the outputs of forward and backward RNNs to combine them. concat = lasagne.layers.concat([lstm_forward, lstm_backward], axis=2, name="bi-lstm") # dropout for output if dropout: concat = lasagne.layers.DropoutLayer(concat, p=0.5) if in_to_out: concat = lasagne.layers.concat([concat, incoming], axis=2) # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units) return concat
def build_BiGRU(incoming, num_units, mask=None, grad_clipping=0, precompute_input=True, dropout=True, in_to_out=False): # construct the forward and backward grus. Now, Ws are initialized by Glorot initializer with default arguments. # Need to try other initializers for specific tasks. # dropout for incoming if dropout: incoming = lasagne.layers.DropoutLayer(incoming, p=0.5) # according to Jozefowicz et al.(2015), init bias of forget gate to 1. resetgate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) updategate_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # now use tanh for nonlinear function of hidden gate hidden_forward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_forward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, precompute_input=precompute_input, resetgate=resetgate_forward, updategate=updategate_forward, hidden_update=hidden_forward, name='forward') # according to Jozefowicz et al.(2015), init bias of forget gate to 1. resetgate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1), b=lasagne.init.Constant(1.)) updategate_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=lasagne.init.Uniform(range=0.1)) # now use tanh for nonlinear function of hidden gate hidden_backward = Gate(W_in=lasagne.init.GlorotUniform(), W_hid=lasagne.init.GlorotUniform(), W_cell=None, nonlinearity=nonlinearities.tanh) gru_backward = lasagne.layers.GRULayer(incoming, num_units, mask_input=mask, grad_clipping=grad_clipping, precompute_input=precompute_input, backwards=True, resetgate=resetgate_backward, updategate=updategate_backward, hidden_update=hidden_backward, name='backward') # concatenate the outputs of forward and backward GRUs to combine them. concat = lasagne.layers.concat([gru_forward, gru_backward], axis=2, name="bi-gru") # dropout for output if dropout: concat = lasagne.layers.DropoutLayer(concat, p=0.5) if in_to_out: concat = lasagne.layers.concat([concat, incoming], axis=2) # the shape of BiRNN output (concat) is (batch_size, input_length, 2 * num_hidden_units) return concat
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = { 'momentum': .999 } net = InputLayer ( name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN(Conv2DLayer (net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2)) net = DropoutLayer (net, name='drop1', p=.5) net = WN(Conv2DLayer (net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2)) net = DropoutLayer (net, name='drop2', p=.5) net = WN(Conv2DLayer (net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer (net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer (net, name='pool3') net = WN(DenseLayer (net, name='dense', num_units=10, **dense_defs), **wn_defs) return net