我们从Python开源项目中,提取了以下18个代码示例,用于说明如何使用lasagne.layers.ElemwiseSumLayer()。
def MDBLOCK(incoming,num_filters,scales,name,nonlinearity): return NL(BN(ESL([incoming, MDCL(NL(BN(MDCL(NL(BN(incoming,name=name+'bnorm0'),nonlinearity),num_filters,scales,name),name=name+'bnorm1'),nonlinearity), num_filters, scales, name+'2')]),name=name+'bnorm2'),nonlinearity) # Gaussian Sample Layer for VAE from Tencia Lee
def GL(mu,ls): return([GSL(z_mu,z_ls) for z_mu,z_ls in zip(mu,ls)]) # Convenience function to return a residual layer. It's not really that much more convenient than ESL'ing, # but I like being able to see when I'm using Residual connections as opposed to Elemwise-sums
def ResLayer(incoming, IB,nonlinearity): return NL(ESL([IB,incoming]),nonlinearity) # Inverse autoregressive flow layer
def get_output_for(self,input, **kwargs): if input.ndim > 2: input = input.flatten(2) activation = T.dot(input, self.W*self.weights_mask) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return self.nonlinearity(activation) # Stripped-Down Direct Input masked layer: Combine this with ESL and a masked layer to get a true DIML. # Consider making this a simultaneous subclass of MaskedLayer and elemwise sum layer for cleanliness # adopted from M.Germain
def make_block(self, name, input, units): self.make_layer(name+'-A', input, units, alpha=0.1) # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0) return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer()
def getTrainedRNN(): ''' Read from file and set the params (To Do: Refactor so as to do this only once) ''' input_size = 39 hidden_size = 50 num_output_classes = 29 learning_rate = 0.001 output_size = num_output_classes+1 batch_size = None input_seq_length = None gradient_clipping = 5 l_in = InputLayer(shape=(batch_size, input_seq_length, input_size)) n_batch, n_time_steps, n_features = l_in.input_var.shape #Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu) l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu) l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True) l_rec_accumulation = ElemwiseSumLayer([l_rec_forward,l_rec_backward]) l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1,hidden_size)) l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu) l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear) l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size))#Reshaping back l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size)) with np.load('CTC_model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable = True) output = lasagne.layers.get_output( l_out_softmax_reshaped ) return l_in, output
def resnet_block(input_, filter_size, num_filters, activation=relu, downsample=False, no_output_act=True, use_shortcut=False, use_wn=False, W_init=Normal(0.02), **kwargs): """ Resnet block layer. """ normalization = weight_norm if use_wn else batch_norm block = [] _stride = 2 if downsample else 1 # conv -> BN -> Relu block.append(normalization(conv_layer(input_, filter_size, num_filters, _stride, 'same', nonlinearity=activation, W=W_init ))) # Conv -> BN block.append(normalization(conv_layer(block[-1], filter_size, num_filters, 1, 'same', nonlinearity=None, W=W_init))) if downsample or use_shortcut: shortcut = conv_layer(input_, 1, num_filters, _stride, 'valid', nonlinearity=None) block.append(ElemwiseSumLayer([shortcut, block[-1]])) else: block.append(ElemwiseSumLayer([input_, block[-1]])) if not no_output_act: block.append(NonlinearityLayer(block[-1], nonlinearity=activation)) return block[-1]
def residual_block(resnet_in, num_styles=None, num_filters=None, filter_size=3, stride=1): if num_filters == None: num_filters = resnet_in.output_shape[1] conv1 = style_conv_block(resnet_in, num_styles, num_filters, filter_size, stride) conv2 = style_conv_block(conv1, num_styles, num_filters, filter_size, stride, linear) res_block = ElemwiseSumLayer([conv2, resnet_in]) return res_block
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.Orthogonal()): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm') l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') l_forward_slice1 = SliceLayer(l_sum, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, name='mask') symbolic_seqlen = l_in.input_var.shape[1] l_delta = DeltaLayer(l_in, window, name='delta') if use_blstm: f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen = l_in.input_var.shape[1] if use_blstm: f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def MDCL(incoming,num_filters,scales,name,dnn=True): if dnn: from lasagne.layers.dnn import Conv2DDNNLayer as C2D # W initialization method--this should also work as Orthogonal('relu'), but I have yet to validate that as thoroughly. winit = initmethod(0.02) # Initialization method for the coefficients sinit = lasagne.init.Constant(1.0/(1+len(scales))) # Number of incoming channels ni =lasagne.layers.get_output_shape(incoming)[1] # Weight parameter--the primary parameter for this block W = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,lasagne.layers.get_output_shape(incoming)[1],3,3))),name=name+'W') # Primary Convolution Layer--No Dilation n = C2D(incoming = incoming, num_filters = num_filters, filter_size = [3,3], stride = [1,1], pad = (1,1), W = W*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_base').dimshuffle(0,'x','x','x'), # Note the broadcasting dimshuffle for the num_filter scalars. b = None, nonlinearity = None, name = name+'base' ) # List of remaining layers. This should probably just all be concatenated into a single list rather than being a separate deal. nd = [] for i,scale in enumerate(scales): # I don't think 0 dilation is technically defined (or if it is it's just the regular filter) but I use it here as a convenient keyword to grab the 1x1 mean conv. if scale==0: nd.append(C2D(incoming = incoming, num_filters = num_filters, filter_size = [1,1], stride = [1,1], pad = (0,0), W = T.mean(W,axis=[2,3]).dimshuffle(0,1,'x','x')*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_1x1').dimshuffle(0,'x','x','x'), b = None, nonlinearity = None, name = name+str(scale))) # Note the dimshuffles in this layer--these are critical as the current DilatedConv2D implementation uses a backward pass. else: nd.append(lasagne.layers.DilatedConv2DLayer(incoming = lasagne.layers.PadLayer(incoming = incoming, width=(scale,scale)), num_filters = num_filters, filter_size = [3,3], dilation=(scale,scale), W = W.dimshuffle(1,0,2,3)*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_'+str(scale)).dimshuffle('x',0,'x','x'), b = None, nonlinearity = None, name = name+str(scale))) return ESL(nd+[n]) # MDC-based Upsample Layer. # This is a prototype I don't make use of extensively. It's operational but it doesn't seem to improve results yet.
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True): weights, biases, shapes, nonlinearities = dbn gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') if use_blstm: l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') # reshape, flatten to 2 dimensions to run softmax on all timesteps l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26): dbn_layers = dbn.get_all_layers() weights = [] biases = [] weights.append(dbn_layers[1].W.astype('float32')) weights.append(dbn_layers[2].W.astype('float32')) weights.append(dbn_layers[3].W.astype('float32')) weights.append(dbn_layers[4].W.astype('float32')) biases.append(dbn_layers[1].b.astype('float32')) biases.append(dbn_layers[2].b.astype('float32')) biases.append(dbn_layers[3].b.astype('float32')) biases.append(dbn_layers[4].b.astype('float32')) gate_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(weights, biases, l_reshape1) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') # l_delta = DeltaLayer(l_reshape2, win, name='delta') # l_lstm = create_lstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') l_lstm, l_lstm_back = create_blstm(l_reshape2, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm1') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta'), output_classes=26, w_init_fn=las.init.Orthogonal(), use_peepholes=False, nonlinearities=rectify): gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, [2000, 1000, 500, 50], [nonlinearities, nonlinearities, nonlinearities, linear], ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out