我们从Python开源项目中,提取了以下44个代码示例,用于说明如何使用lasagne.layers.ReshapeLayer()。
def _invert_GlobalPoolLayer(self, layer, feeder): assert isinstance(layer, L.GlobalPoolLayer) assert layer.pool_function == T.mean assert len(L.get_output_shape(layer.input_layer)) == 4 target_shape = L.get_output_shape(feeder)+(1,1) if target_shape[0] is None: target_shape = (-1,) + target_shape[1:] feeder = L.ReshapeLayer(feeder, target_shape) upscaling = L.get_output_shape(layer.input_layer)[2:] feeder = L.Upscale2DLayer(feeder, upscaling) def expression(x): return x / np.prod(upscaling).astype(theano.config.floatX) feeder = L.ExpressionLayer(feeder, expression) return feeder
def build_critic(input_var=None): from lasagne.layers import (InputLayer, Conv2DLayer, ReshapeLayer, DenseLayer) try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import LeakyRectify lrelu = LeakyRectify(0.2) # input: (None, 1, 28, 28) layer = InputLayer(shape=(None, 1, 28, 28), input_var=input_var) # two convolutions layer = batch_norm(Conv2DLayer(layer, 64, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer(layer, 128, 5, stride=2, pad='same', nonlinearity=lrelu)) # fully-connected layer layer = batch_norm(DenseLayer(layer, 1024, nonlinearity=lrelu)) # output layer (linear) layer = DenseLayer(layer, 1, nonlinearity=None) print ("critic output:", layer.output_shape) return layer
def build_critic(input_var=None, verbose=False): from lasagne.layers import (InputLayer, Conv2DLayer, ReshapeLayer, DenseLayer) try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import LeakyRectify, sigmoid lrelu = LeakyRectify(0.2) # input: (None, 1, 28, 28) layer = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # two convolutions layer = batch_norm(Conv2DLayer(layer, 128, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer(layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer(layer, 512, 5, stride=2, pad='same', nonlinearity=lrelu)) # # fully-connected layer # layer = batch_norm(DenseLayer(layer, 1024, nonlinearity=lrelu)) # output layer (linear) layer = DenseLayer(layer, 1, nonlinearity=None) if verbose: print ("critic output:", layer.output_shape) return layer
def generator(input_var): network = lasagne.layers.InputLayer(shape=(None, NLAT,1,1), input_var=input_var) network = conv_layer(network, 1, 4 * 4 * 128, 1, 'valid') #print(input_var.shape[0]) network = ll.ReshapeLayer(network, (-1, 128, 4, 4)) network = resnet_block(network, 3, 128) network = resnet_block(network, 3, 128) network = BilinearUpsampling(network, ratio=2) network = batch_norm(conv_layer(network, 3, 64, 1, 'same')) network = resnet_block(network, 3, 64) network = BilinearUpsampling(network, ratio=2) network = batch_norm(conv_layer(network, 3, 32, 1, 'valid')) network = BilinearUpsampling(network, ratio=2) network = resnet_block(network, 3, 32) network = conv_layer(network, 1, 1, 1, 'valid', nonlinearity=sigmoid) #network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1, nonlinearity=sigmoid) return network # In[23]:
def generator(input_var): network = lasagne.layers.InputLayer(shape=(None, NLAT,1,1), input_var=input_var) network = ll.DenseLayer(network, num_units=4*4*64, W=Normal(0.05), nonlinearity=nn.relu) #print(input_var.shape[0]) network = ll.ReshapeLayer(network, (batch_size,64,4,4)) network = nn.Deconv2DLayer(network, (batch_size,32,7,7), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,32,11,11), (5,5), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,32,25,25), (5,5), stride=(2,2), pad='valid', W=Normal(0.05), nonlinearity=nn.relu) network = nn.Deconv2DLayer(network, (batch_size,1,28,28), (4,4), stride=(1,1), pad='valid', W=Normal(0.05), nonlinearity=sigmoid) #network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1, nonlinearity=sigmoid) return network # In[23]:
def build_rnn(conv_input_var, seq_input_var, conv_shape, word_dims, n_hid, lstm_layers): ret = {} ret['seq_input'] = seq_layer = InputLayer((None, None, word_dims), input_var=seq_input_var) batchsize, seqlen, _ = seq_layer.input_var.shape ret['seq_resh'] = seq_layer = ReshapeLayer(seq_layer, shape=(-1, word_dims)) ret['seq_proj'] = seq_layer = DenseLayer(seq_layer, num_units=n_hid) ret['seq_resh2'] = seq_layer = ReshapeLayer(seq_layer, shape=(batchsize, seqlen, n_hid)) ret['conv_input'] = conv_layer = InputLayer(conv_shape, input_var = conv_input_var) ret['conv_proj'] = conv_layer = DenseLayer(conv_layer, num_units=n_hid) ret['conv_resh'] = conv_layer = ReshapeLayer(conv_layer, shape=([0], 1, -1)) ret['input_concat'] = layer = ConcatLayer([conv_layer, seq_layer], axis=1) for lstm_layer_idx in xrange(lstm_layers): ret['lstm_{}'.format(lstm_layer_idx)] = layer = LSTMLayer(layer, n_hid) ret['out_resh'] = layer = ReshapeLayer(layer, shape=(-1, n_hid)) ret['output_proj'] = layer = DenseLayer(layer, num_units=word_dims, nonlinearity=log_softmax) ret['output'] = layer = ReshapeLayer(layer, shape=(batchsize, seqlen+1, word_dims)) ret['output'] = layer = SliceLayer(layer, indices=slice(None, -1), axis=1) return ret # originally from # https://github.com/Lasagne/Recipes/blob/master/examples/styletransfer/Art%20Style%20Transfer.ipynb
def _invert_DenseLayer(self,layer,feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder, layer) feeder = self._get_normalised_relevance_layer(layer, feeder) output_units = np.prod(L.get_output_shape(layer.input_layer)[1:]) output_layer = L.DenseLayer(feeder, num_units=output_units) W = output_layer.W tmp_shape = np.asarray((-1,)+L.get_output_shape(output_layer)[1:]) x_layer = L.ReshapeLayer(layer.input_layer, tmp_shape.tolist()) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def _invert_layer(self, layer, feeder): layer_type = type(layer) if L.get_output_shape(feeder) != L.get_output_shape(layer): feeder = L.ReshapeLayer(feeder, (-1,)+L.get_output_shape(layer)[1:]) if layer_type is L.InputLayer: return self._invert_InputLayer(layer, feeder) elif layer_type is L.FlattenLayer: return self._invert_FlattenLayer(layer, feeder) elif layer_type is L.DenseLayer: return self._invert_DenseLayer(layer, feeder) elif layer_type is L.Conv2DLayer: return self._invert_Conv2DLayer(layer, feeder) elif layer_type is L.DropoutLayer: return self._invert_DropoutLayer(layer, feeder) elif layer_type in [L.MaxPool2DLayer, L.MaxPool1DLayer]: return self._invert_MaxPoolingLayer(layer, feeder) elif layer_type is L.PadLayer: return self._invert_PadLayer(layer, feeder) elif layer_type is L.SliceLayer: return self._invert_SliceLayer(layer, feeder) elif layer_type is L.LocalResponseNormalization2DLayer: return self._invert_LocalResponseNormalisation2DLayer(layer, feeder) elif layer_type is L.GlobalPoolLayer: return self._invert_GlobalPoolLayer(layer, feeder) else: return self._invert_UnknownLayer(layer, feeder)
def getTrainedRNN(): ''' Read from file and set the params (To Do: Refactor so as to do this only once) ''' input_size = 39 hidden_size = 50 num_output_classes = 29 learning_rate = 0.001 output_size = num_output_classes+1 batch_size = None input_seq_length = None gradient_clipping = 5 l_in = InputLayer(shape=(batch_size, input_seq_length, input_size)) n_batch, n_time_steps, n_features = l_in.input_var.shape #Unnecessary in this version. Just collecting the info so that we can reshape the output back to the original shape # h_1 = DenseLayer(l_in, num_units=hidden_size, nonlinearity=clipped_relu) l_rec_forward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu) l_rec_backward = RecurrentLayer(l_in, num_units=hidden_size, grad_clipping=gradient_clipping, nonlinearity=clipped_relu, backwards=True) l_rec_accumulation = ElemwiseSumLayer([l_rec_forward,l_rec_backward]) l_rec_reshaped = ReshapeLayer(l_rec_accumulation, (-1,hidden_size)) l_h2 = DenseLayer(l_rec_reshaped, num_units=hidden_size, nonlinearity=clipped_relu) l_out = DenseLayer(l_h2, num_units=output_size, nonlinearity=lasagne.nonlinearities.linear) l_out_reshaped = ReshapeLayer(l_out, (n_batch, n_time_steps, output_size))#Reshaping back l_out_softmax = NonlinearityLayer(l_out, nonlinearity=lasagne.nonlinearities.softmax) l_out_softmax_reshaped = ReshapeLayer(l_out_softmax, (n_batch, n_time_steps, output_size)) with np.load('CTC_model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(l_out_softmax_reshaped, param_values, trainable = True) output = lasagne.layers.get_output( l_out_softmax_reshaped ) return l_in, output
def getTrainedCLM(): ''' Read CLM from file ''' #Some parameters for the CLM INPUT_SIZE = 29 #Hidden layer hyper-parameters N_HIDDEN = 100 HIDDEN_NONLINEARITY = 'rectify' #Gradient clipping GRAD_CLIP = 100 l_in = lasagne.layers.InputLayer(shape = (None, None, INPUT_SIZE)) #One-hot represenntation of character indices l_mask = lasagne.layers.InputLayer(shape = (None, None)) l_recurrent = lasagne.layers.RecurrentLayer(incoming = l_in, num_units=N_HIDDEN, mask_input = l_mask, learn_init=True, grad_clipping=GRAD_CLIP) Recurrent_output=lasagne.layers.get_output(l_recurrent) n_batch, n_time_steps, n_features = l_in.input_var.shape l_reshape = lasagne.layers.ReshapeLayer(l_recurrent, (-1, N_HIDDEN)) Reshape_output = lasagne.layers.get_output(l_reshape) l_h1 = lasagne.layers.DenseLayer(l_reshape, num_units=N_HIDDEN) l_h2 = lasagne.layers.DenseLayer(l_h1, num_units=N_HIDDEN) l_dense = lasagne.layers.DenseLayer(l_h2, num_units=INPUT_SIZE, nonlinearity = lasagne.nonlinearities.softmax) with np.load('CLM_model.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(l_dense, param_values,trainable = True) output = lasagne.layers.get_output( l_dense ) return l_in,l_mask,output #def getCLMOneHot( sequence ):
def build_generator(input_var=None): from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer try: from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer except ImportError: raise ImportError("Your Lasagne is too old. Try the bleeding-edge " "version: http://lasagne.readthedocs.io/en/latest/" "user/installation.html#bleeding-edge-version") try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import sigmoid # input: 100dim layer = InputLayer(shape=(None, 100), input_var=input_var) # fully-connected layer layer = batch_norm(DenseLayer(layer, 1024)) # project and reshape layer = batch_norm(DenseLayer(layer, 128*7*7)) layer = ReshapeLayer(layer, ([0], 128, 7, 7)) # two fractional-stride convolutions layer = batch_norm(Deconv2DLayer(layer, 64, 5, stride=2, crop='same', output_size=14)) layer = Deconv2DLayer(layer, 1, 5, stride=2, crop='same', output_size=28, nonlinearity=sigmoid) print ("Generator output:", layer.output_shape) return layer
def build_generator(input_var=None, verbose=False): from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer try: from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer except ImportError: raise ImportError("Your Lasagne is too old. Try the bleeding-edge " "version: http://lasagne.readthedocs.io/en/latest/" "user/installation.html#bleeding-edge-version") try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import sigmoid # input: 100dim layer = InputLayer(shape=(None, 100), input_var=input_var) # # fully-connected layer # layer = batch_norm(DenseLayer(layer, 1024)) # project and reshape layer = batch_norm(DenseLayer(layer, 1024*4*4)) layer = ReshapeLayer(layer, ([0], 1024, 4, 4)) # two fractional-stride convolutions layer = batch_norm(Deconv2DLayer(layer, 512, 5, stride=2, crop='same', output_size=8)) layer = batch_norm(Deconv2DLayer(layer, 256, 5, stride=2, crop='same', output_size=16)) layer = Deconv2DLayer(layer, 3, 5, stride=2, crop='same', output_size=32, nonlinearity=sigmoid) if verbose: print ("Generator output:", layer.output_shape) return layer
def build_nets(input_var, channels=1, do_batchnorm=True, z_dim=100): def ns(shape): ret=list(shape) ret[0]=[0] return tuple(ret) ret = {} bn = batch_norm if do_batchnorm else lambda x:x ret['ae_in'] = layer = InputLayer(shape=(None,channels,28,28), input_var=input_var) ret['ae_conv1'] = layer = bn(Conv2DLayer(layer, num_filters=64, filter_size=5)) ret['ae_pool1'] = layer = MaxPool2DLayer(layer, pool_size=2) ret['ae_conv2'] = layer = bn(Conv2DLayer(layer, num_filters=128, filter_size=3)) ret['ae_pool2'] = layer = MaxPool2DLayer(layer, pool_size=2) ret['ae_enc'] = layer = DenseLayer(layer, num_units=z_dim, nonlinearity=nn.nonlinearities.tanh) ret['ae_unenc'] = layer = bn(nn.layers.DenseLayer(layer, num_units = np.product(nn.layers.get_output_shape(ret['ae_pool2'])[1:]))) ret['ae_resh'] = layer = ReshapeLayer(layer, shape=ns(nn.layers.get_output_shape(ret['ae_pool2']))) ret['ae_depool2'] = layer = Upscale2DLayer(layer, scale_factor=2) ret['ae_deconv2'] = layer = bn(Conv2DLayer(layer, num_filters=64, filter_size=3, pad='full')) ret['ae_depool1'] = layer = Upscale2DLayer(layer, scale_factor=2) ret['ae_out'] = Conv2DLayer(layer, num_filters=1, filter_size=5, pad='full', nonlinearity=nn.nonlinearities.sigmoid) ret['disc_in'] = layer = InputLayer(shape=(None,channels,28,28), input_var=input_var) ret['disc_conv1'] = layer = bn(Conv2DLayer(layer, num_filters=64, filter_size=5)) ret['disc_pool1'] = layer = MaxPool2DLayer(layer, pool_size=2) ret['disc_conv2'] = layer = bn(Conv2DLayer(layer, num_filters=128, filter_size=3)) ret['disc_pool2'] = layer = MaxPool2DLayer(layer, pool_size=2) ret['disc_hid'] = layer = bn(DenseLayer(layer, num_units=100)) ret['disc_out'] = DenseLayer(layer, num_units=1, nonlinearity=nn.nonlinearities.sigmoid) return ret
def generator(input_var,Y): yb = Y#.dimshuffle(0, 1, 'x', 'x') G_1 = lasagne.layers.InputLayer(shape=(None, NLAT,1,1),input_var=input_var) G_2 = lasagne.layers.InputLayer(shape=(None, 10),input_var=Y) network=G_1 network_yb=G_2 network = conv_layer(network, 1, 4 * 4 * 128, 1, 'valid') #print(input_var.shape[0]) network = ll.ReshapeLayer(network, (-1, 128, 4, 4)) network = CondConvConcatLayer([network,network_yb]) network = resnet_block(network, 3,138) network = CondConvConcatLayer([network,network_yb]) #network = resnet_block(network, 3, 128) network = BilinearUpsampling(network, ratio=2) network = batch_norm(conv_layer(network, 3,138, 1, 'same')) network = CondConvConcatLayer([network,network_yb]) network = resnet_block(network, 3, 148) network = CondConvConcatLayer([network,network_yb]) network = BilinearUpsampling(network, ratio=2) network = batch_norm(conv_layer(network, 3, 32, 1, 'valid')) network = BilinearUpsampling(network, ratio=2) network = batch_norm(conv_layer(network, 3, 32, 1, 'same')) network = CondConvConcatLayer([network,network_yb]) #network = resnet_block(network, 3, 32) network = conv_layer(network, 1, 1, 1, 'valid', nonlinearity=sigmoid) #network =lasagne.layers.Conv2DLayer(network, num_filters=1, filter_size=1, stride=1, nonlinearity=sigmoid) return network, G_1,G_2 # In[23]:
def build_model(self, img_batch, pose_code): img_size = self.options['img_size'] pose_code_size = self.options['pose_code_size'] filter_size = self.options['filter_size'] batch_size = img_batch.shape[0] # image encoding l_in = InputLayer(shape = [None, img_size[0], img_size[1], img_size[2]], input_var=img_batch) l_in_dimshuffle = DimshuffleLayer(l_in, (0,3,1,2)) l_conv1_1 = Conv2DLayer(l_in_dimshuffle, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_conv1_2 = Conv2DLayer(l_conv1_1, num_filters=64, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_pool1 = MaxPool2DLayer(l_conv1_2, pool_size=(2,2)) # pose encoding l_in_2 = InputLayer(shape=(None, pose_code_size), input_var=pose_code) l_pose_1 = DenseLayer(l_in_2, num_units=512, W=HeNormal(),nonlinearity=rectify) l_pose_2 = DenseLayer(l_pose_1, num_units=pose_code_size*l_pool1.output_shape[2]*l_pool1.output_shape[3], W=HeNormal(),nonlinearity=rectify) l_pose_reshape = ReshapeLayer(l_pose_2, shape=(batch_size, pose_code_size, l_pool1.output_shape[2], l_pool1.output_shape[3])) # deeper fusion l_concat = ConcatLayer([l_pool1, l_pose_reshape], axis=1) l_pose_conv_1 = Conv2DLayer(l_concat, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_pose_conv_2 = Conv2DLayer(l_pose_conv_1, num_filters=128, filter_size=filter_size, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_pool2 = MaxPool2DLayer(l_pose_conv_2, pool_size=(2,2)) l_conv_3 = Conv2DLayer(l_pool2, num_filters=128, filter_size=(1,1), W=HeNormal()) l_unpool1 = Unpool2DLayer(l_conv_3, ds = (2,2)) # image decoding l_deconv_conv1_1 = Conv2DLayer(l_unpool1, num_filters=128, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_deconv_conv1_2 = Conv2DLayer(l_deconv_conv1_1, num_filters=64, filter_size=filter_size, nonlinearity=rectify,W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_unpool2 = Unpool2DLayer(l_deconv_conv1_2, ds = (2,2)) l_deconv_conv2_1 = Conv2DLayer(l_unpool2, num_filters=64, filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) l_deconv_conv2_2 = Conv2DLayer(l_deconv_conv2_1, num_filters=img_size[2], filter_size=filter_size, nonlinearity=None, W=HeNormal(), pad=(filter_size[0]//2, filter_size[1]//2)) return l_deconv_conv2_2, l_pose_reshape
def build_combination(input_var, output_nodes, input_size, stocks, period, feature_types): # Input layer input_layer = InputLayer(shape=(None, 1, input_size), input_var=input_var) assert input_size == stocks * period * feature_types input_layer = ReshapeLayer(input_layer, (([0], stocks, period, feature_types))) #slice for partition stock_feature_type_layers = [] for ix in range(stocks): stock_layer = SliceLayer(input_layer, indices=ix, axis=1) this_stock_feature_type_layers = [] for rx in range(feature_types): this_stock_feature_type_layers.append(SliceLayer(stock_layer, indices=rx, axis=1)) stock_feature_type_layers.append(this_stock_feature_type_layers) stock_networks = [] for this_stock_feature_type_layers in stock_feature_type_layers: this_stock_networks = [] for feature_type_layer in this_stock_feature_type_layers: tmp = DenseLayer(dropout(feature_type_layer, p=.2), num_units=10, nonlinearity=tanh) tmp = DenseLayer(dropout(tmp, p=.5), num_units=1, nonlinearity=tanh) this_stock_networks.append(tmp) this_stock_network = ConcatLayer(this_stock_networks) stock_network = DenseLayer(dropout(this_stock_network, p=.5), num_units=1, nonlinearity=tanh) stock_networks.append(stock_network) network = ConcatLayer(stock_networks) network = DenseLayer(dropout(network, p=.5), num_units=output_nodes, nonlinearity=sigmoid) return network, stock_networks
def create_model(input_var, input_shape, options): conv_num_filters1 = 100 conv_num_filters2 = 150 conv_num_filters3 = 200 filter_size1 = 5 filter_size2 = 5 filter_size3 = 3 pool_size = 2 encode_size = options['BOTTLENECK'] dense_mid_size = options['DENSE'] pad_in = 'valid' pad_out = 'full' scaled_tanh = create_scaled_tanh() input = InputLayer(shape=input_shape, input_var=input_var, name='input') conv2d1 = Conv2DLayer(input, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh) maxpool2d2 = MaxPool2DLayer(conv2d1, pool_size=pool_size, name='maxpool2d2') conv2d3 = Conv2DLayer(maxpool2d2, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh) maxpool2d4 = MaxPool2DLayer(conv2d3, pool_size=pool_size, name='maxpool2d4', pad=(1,0)) conv2d5 = Conv2DLayer(maxpool2d4, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh) reshape6 = ReshapeLayer(conv2d5, shape=([0], -1), name='reshape6') # 3000 reshape6_output = reshape6.output_shape[1] dense7 = DenseLayer(reshape6, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh) bottleneck = DenseLayer(dense7, num_units=encode_size, name='bottleneck', nonlinearity=linear) # print_network(bottleneck) dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear) dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9') reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7 deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride, W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh) upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12') deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride, W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh) upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14') deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride, crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh) reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16') print_network(reshape16) return reshape16
def compile_delta_features(): # create input input_var = T.tensor3('input', dtype='float32') win_var = T.iscalar('theta') weights, biases = autoencoder.load_dbn() ''' activations = [sigmoid, sigmoid, sigmoid, linear, sigmoid, sigmoid, sigmoid, linear] layersizes = [2000, 1000, 500, 50, 500, 1000, 2000, 1200] ae = autoencoder.create_model(l_input, weights, biases, activations, layersizes) print_network(ae) reconstruct = las.layers.get_output(ae) reconstruction_fn = theano.function([input_var], reconstruct, allow_input_downcast=True) recon_img = reconstruction_fn(test_data_resized) visualize_reconstruction(test_data_resized[225:250], recon_img[225:250]) ''' l_input = InputLayer((None, None, 1200), input_var, name='input') symbolic_batchsize = l_input.input_var.shape[0] symbolic_seqlen = l_input.input_var.shape[1] en_activations = [sigmoid, sigmoid, sigmoid, linear] en_layersizes = [2000, 1000, 500, 50] l_reshape1 = ReshapeLayer(l_input, (-1, l_input.shape[-1]), name='reshape1') l_encoder = autoencoder.create_model(l_reshape1, weights[:4], biases[:4], en_activations, en_layersizes) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win_var, name='delta') l_slice = SliceLayer(l_delta, indices=slice(50, None), axis=-1, name='slice') # extract the delta coefficients l_reshape3 = ReshapeLayer(l_slice, (-1, l_slice.output_shape[-1]), name='reshape3') print_network(l_reshape3) delta_features = las.layers.get_output(l_reshape3) delta_fn = theano.function([input_var, win_var], delta_features, allow_input_downcast=True) return delta_fn
def create_model(incoming, options): conv_num_filters1 = 100 conv_num_filters2 = 150 conv_num_filters3 = 200 filter_size1 = 5 filter_size2 = 5 filter_size3 = 3 pool_size = 2 encode_size = options['BOTTLENECK'] dense_mid_size = options['DENSE'] pad_in = 'valid' pad_out = 'full' scaled_tanh = create_scaled_tanh() conv2d1 = Conv2DLayer(incoming, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh) maxpool2d3 = MaxPool2DLayer(conv2d1, pool_size=pool_size, name='maxpool2d3') bn2 = BatchNormLayer(maxpool2d3, name='batchnorm2') conv2d4 = Conv2DLayer(bn2, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d4', nonlinearity=scaled_tanh) maxpool2d6 = MaxPool2DLayer(conv2d4, pool_size=pool_size, name='maxpool2d6', pad=(1,0)) bn3 = BatchNormLayer(maxpool2d6, name='batchnorm3') conv2d7 = Conv2DLayer(bn3, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d7', nonlinearity=scaled_tanh) reshape9 = ReshapeLayer(conv2d7, shape=([0], -1), name='reshape9') # 3000 reshape9_output = reshape9.output_shape[1] bn8 = BatchNormLayer(reshape9, name='batchnorm8') dense10 = DenseLayer(bn8, num_units=dense_mid_size, name='dense10', nonlinearity=scaled_tanh) bn11 = BatchNormLayer(dense10, name='batchnorm11') bottleneck = DenseLayer(bn11, num_units=encode_size, name='bottleneck', nonlinearity=linear) # print_network(bottleneck) dense12 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense12', nonlinearity=linear) dense13 = DenseLayer(dense12, num_units=reshape9_output, W=dense10.W.T, nonlinearity=scaled_tanh, name='dense13') reshape14 = ReshapeLayer(dense13, shape=([0], conv_num_filters3, 3, 5), name='reshape14') # 32 x 4 x 7 deconv2d19 = Deconv2DLayer(reshape14, conv2d7.input_shape[1], conv2d7.filter_size, stride=conv2d7.stride, W=conv2d7.W, flip_filters=not conv2d7.flip_filters, name='deconv2d19', nonlinearity=scaled_tanh) upscale2d16 = Upscale2DLayer(deconv2d19, scale_factor=pool_size, name='upscale2d16') deconv2d17 = Deconv2DLayer(upscale2d16, conv2d4.input_shape[1], conv2d4.filter_size, stride=conv2d4.stride, W=conv2d4.W, flip_filters=not conv2d4.flip_filters, name='deconv2d17', nonlinearity=scaled_tanh) upscale2d18 = Upscale2DLayer(deconv2d17, scale_factor=pool_size, name='upscale2d18') deconv2d19 = Deconv2DLayer(upscale2d18, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride, crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh) reshape20 = ReshapeLayer(deconv2d19, ([0], -1), name='reshape20') return reshape20, bottleneck
def create_model(incoming, options): conv_num_filters1 = 100 conv_num_filters2 = 150 conv_num_filters3 = 200 filter_size1 = 5 filter_size2 = 5 filter_size3 = 3 pool_size = 2 encode_size = options['BOTTLENECK'] dense_mid_size = options['DENSE'] pad_in = 'valid' pad_out = 'full' scaled_tanh = create_scaled_tanh() conv2d1 = Conv2DLayer(incoming, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh) maxpool2d2 = MaxPool2DLayer(conv2d1, pool_size=pool_size, name='maxpool2d2') conv2d3 = Conv2DLayer(maxpool2d2, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh) maxpool2d4 = MaxPool2DLayer(conv2d3, pool_size=pool_size, name='maxpool2d4', pad=(1,0)) conv2d5 = Conv2DLayer(maxpool2d4, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh) reshape6 = ReshapeLayer(conv2d5, shape=([0], -1), name='reshape6') # 3000 reshape6_output = reshape6.output_shape[1] dense7 = DenseLayer(reshape6, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh) bottleneck = DenseLayer(dense7, num_units=encode_size, name='bottleneck', nonlinearity=linear) # print_network(bottleneck) dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear) dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9') reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7 deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride, W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh) upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12') deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride, W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh) upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14') deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride, crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh) reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16') return reshape16, bottleneck
def create_model(input_shape, input_var, mask_shape, mask_var, window, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, name='mask') symbolic_seqlen = l_in.input_var.shape[1] l_delta = DeltaLayer(l_in, window, name='delta') if use_blstm: f_lstm, b_lstm = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model(input_shape, input_var, mask_shape, mask_var, lstm_size=250, output_classes=26, w_init=las.init.GlorotUniform(), use_peepholes=False, use_blstm=True): gate_parameters = Gate( W_in=w_init, W_hid=w_init, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init, W_hid=w_init, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_seqlen = l_in.input_var.shape[1] if use_blstm: f_lstm, b_lstm = create_blstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_sum = ElemwiseSumLayer([f_lstm, b_lstm], name='sum') # reshape to (num_examples * seq_len, lstm_size) l_reshape = ReshapeLayer(l_sum, (-1, lstm_size), name='reshape') else: l_lstm = create_lstm(l_in, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def buildFCN_up(incoming_net, incoming_layer, unpool, skip=False, unpool_type='standard', n_classes=21, out_nonlin=linear, additional_pool=0, ae_h=False, dropout=0., bn=0): ''' Build fcn decontracting path Parameters ---------- incoming_net: contracting path network incoming_layer: string, name of last layer of the contracting path unpool: int, number of unpooling/upsampling layers we need skip: bool, whether to skip connections unpool_type: string, unpooling type n_classes: int, number of classes out_nonlin: output nonlinearity ''' # Upsampling path net = {} # net['score'] = ConvLayer(incoming_net[incoming_layer], n_classes, 1, # pad='valid', flip_filters=True) # for loop to add upsampling layers for p in range(unpool, 0, -1): # Unpool and Crop if unpool_type='standard' or Depool and Conv if p == unpool-additional_pool+1 and ae_h: layer_name = 'h_hat' else: layer_name = None UnpoolNet(incoming_net, net, p, unpool, n_classes, incoming_layer, skip, unpool_type=unpool_type, layer_name=layer_name, dropout=dropout, bn=bn) # final dimshuffle, reshape and softmax net['final_dimshuffle'] = DimshuffleLayer(net['fused_up'+str(p) if unpool > 0 else 'score'], (0, 2, 3, 1)) laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape net['final_reshape'] = ReshapeLayer(net['final_dimshuffle'], (T.prod(laySize[0:3]), laySize[3])) net['probs'] = NonlinearityLayer(net['final_reshape'], nonlinearity=out_nonlin) # go back to 4D net['probs_reshape'] = ReshapeLayer(net['probs'], (laySize[0], laySize[1], laySize[2], n_classes)) net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], (0, 3, 1, 2)) # print('Input to last layer: ', net['probs_dimshuffle'].input_shape) print(net.keys()) return net
def _invert_Conv2DLayer(self,layer,feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder,layer) feeder = self._get_normalised_relevance_layer(layer,feeder) f_s = layer.filter_size if layer.pad == 'same': pad = 'same' elif layer.pad == 'valid' or layer.pad == (0, 0): pad = 'full' else: raise RuntimeError("Define your padding as full or same.") # By definition the # Flip filters must be on to be a proper deconvolution. num_filters = L.get_output_shape(layer.input_layer)[1] if layer.stride == (4,4): # Todo: similar code gradient based explainers. Merge. feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate') output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) conv_layer = output_layer tmp = L.SliceLayer(output_layer, slice(0, -3), axis=3) output_layer = L.SliceLayer(tmp, slice(0, -3), axis=2) output_layer.W = conv_layer.W else: output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) W = output_layer.W # Do the multiplication. x_layer = L.ReshapeLayer(layer.input_layer, (-1,)+L.get_output_shape(output_layer)[1:]) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def __init__(self, input_shape, output_dim, hidden_dim, hidden_nonlinearity=LN.rectify, output_nonlinearity=None, name=None, input_var=None, input_layer=None): if input_layer is None: l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input") else: l_in = input_layer l_step_input = L.InputLayer(shape=(None,) + input_shape) l_step_prev_hidden = L.InputLayer(shape=(None, hidden_dim)) l_gru = GRULayer(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, hidden_init_trainable=False) l_gru_flat = L.ReshapeLayer( l_gru, shape=(-1, hidden_dim) ) l_output_flat = L.DenseLayer( l_gru_flat, num_units=output_dim, nonlinearity=output_nonlinearity, ) l_output = OpLayer( l_output_flat, op=lambda flat_output, l_input: flat_output.reshape((l_input.shape[0], l_input.shape[1], -1)), shape_op=lambda flat_output_shape, l_input_shape: (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]), extras=[l_in] ) l_step_hidden = l_gru.get_step_layer(l_step_input, l_step_prev_hidden) l_step_output = L.DenseLayer( l_step_hidden, num_units=output_dim, nonlinearity=output_nonlinearity, W=l_output_flat.W, b=l_output_flat.b, ) self._l_in = l_in self._hid_init_param = l_gru.h0 self._l_gru = l_gru self._l_out = l_output self._l_step_input = l_step_input self._l_step_prev_hidden = l_step_prev_hidden self._l_step_hidden = l_step_hidden self._l_step_output = l_step_output
def network_discriminator(self, input, network_weights=None): layers = [] if isinstance(input, lasagne.layers.Layer): layers.append(input) # First convolution layers.append(conv_layer(input, n_filters=self.n_filters, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) else: # Input layer layers.append(InputLayer(shape=(None, 3, self.input_size, self.input_size), input_var=input, name='discriminator/encoder/input')) # First convolution layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) # Convolutional blocks (encoder)self.n_filters*i_block n_blocks = int(np.log2(self.input_size/8)) + 1 # end up with 8x8 output for i_block in range(1, n_blocks+1): layers.append(conv_layer(layers[-1], n_filters=self.n_filters*i_block, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) layers.append(conv_layer(layers[-1], n_filters=self.n_filters*i_block, stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) if i_block != n_blocks: # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block+1), stride=2, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) layers.append(MaxPool2DLayer(layers[-1], pool_size=2, stride=2, name='discriminator/encoder/pooling%d' % len(layers))) # else: # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='discriminator/encoder/conv%d' % len(layers), network_weights=network_weights)) # Dense layers (linear outputs) layers.append(dense_layer(layers[-1], n_units=self.hidden_size, name='discriminator/encoder/dense%d' % len(layers), network_weights=network_weights)) # Dense layer up (from h to n*8*8) layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters), name='discriminator/decoder/dense%d' % len(layers), network_weights=network_weights)) layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters, 8, 8), name='discriminator/decoder/reshape%d' % len(layers))) # Convolutional blocks (decoder) for i_block in range(1, n_blocks+1): layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='discriminator/decoder/conv%d' % len(layers), network_weights=network_weights)) layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='discriminator/decoder/conv%d' % len(layers), network_weights=network_weights)) if i_block != n_blocks: layers.append(Upscale2DLayer(layers[-1], scale_factor=2, name='discriminator/decoder/upsample%d' % len(layers))) # Final layer (make sure input images are in the range [-1, 1] layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='discriminator/decoder/output', network_weights=network_weights, nonlinearity=sigmoid)) # Network in dictionary form network = {layer.name: layer for layer in layers} return network
def network_generator(self, input_var, network_weights=None): # Input layer layers = [] n_blocks = int(np.log2(self.input_size / 8)) + 1 # end up with 8x8 output layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input')) # Dense layer up (from h to n*8*8) layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters), name='generator/dense%d' % len(layers), network_weights=network_weights)) layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters, 8, 8), name='generator/reshape%d' % len(layers))) # Convolutional blocks (decoder) for i_block in range(1, n_blocks+1): layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights)) layers.append(conv_layer(layers[-1], n_filters=self.n_filters, stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights)) if i_block != n_blocks: layers.append(Upscale2DLayer(layers[-1], scale_factor=2, name='generator/upsample%d' % len(layers))) # Final layer (make sure input images are in the range [-1, 1] if tanh used) layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=sigmoid)) # Network in dictionary form network = {layer.name: layer for layer in layers} return network # def network_generator_alt(self, input_var, network_weights=None): # # # Input layer # layers = [] # n_blocks = int(np.log2(self.input_size / 8)) + 1 # end up with 8x8 output # layers.append(InputLayer(shape=(None, self.hidden_size), input_var=input_var, name='generator/input')) # # # Dense layer up (from h to n*8*8) # layers.append(dense_layer(layers[-1], n_units=(8 * 8 * self.n_filters*n_blocks), name='generator/dense%d' % len(layers), network_weights=network_weights, nonlinearity=elu, bn=True)) # layers.append(ReshapeLayer(layers[-1], (-1, self.n_filters*n_blocks, 8, 8), name='generator/reshape%d' % len(layers))) # # # Convolutional blocks (decoder) # for i_block in range(1, n_blocks+1)[::-1]: # # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True)) # # layers.append(conv_layer(layers[-1], n_filters=self.n_filters*(i_block), stride=1, name='generator/conv%d' % len(layers), network_weights=network_weights, bn=True)) # if i_block != 1: # layers.append(transposed_conv_layer(layers[-1], n_filters=self.n_filters*(i_block-1), stride=2, name='generator/upsample%d' % len(layers), # output_size=8*2**(n_blocks-i_block+1), network_weights=network_weights, nonlinearity=elu, bn=True)) # # # Final layer (make sure input images are in the range [-1, 1] # layers.append(conv_layer(layers[-1], n_filters=3, stride=1, name='generator/output', network_weights=network_weights, nonlinearity=tanh, bn=False)) # # # Network in dictionary form # network = {layer.name: layer for layer in layers} # # return network
def __init__(self, rng, rstream, x, y, setting): # add cost """ Constructing the mlp model. Arguments: rng, rstream - random streams """ self.paramsEle = [] self.paramsHyper = [] self.layers = [ll.InputLayer((None, 3, 28, 28))] self.layers.append(ll.ReshapeLayer(self.layers[-1], (None, 3*28*28))) penalty = 0. for num in [1000, 1000, 1000, 10]: # TODO: refactor it later self.layers.append(DenseLayerWithReg(setting, self.layers[-1], num_units=num)) self.paramsEle += self.layers[-1].W self.paramsEle += self.layers[-1].b if setting.regL2 is not None: tempL2 = self.layers[-1].L2 * T.sqr(self.layers[-1].W) penalty += T.sum(tempL2) self.paramsHyper += self.layers[-1].L2 self.y = self.layers[-1].output self.prediction = T.argmax(self.y, axis=1) self.penalty = penalty if penalty != 0. else T.constant(0.) def stable(x, stabilize=True): if stabilize: x = T.where(T.isnan(x), 1000., x) x = T.where(T.isinf(x), 1000., x) return x if setting.cost == 'categorical_crossentropy': def costFun1(y, label): return stable(-T.log(y[T.arange(label.shape[0]), label]), stabilize=True) else: raise NotImplementedError def costFunT1(*args, **kwargs): return T.mean(costFun1(*args, **kwargs)) # cost function self.trainCost = costFunT1(self.y, y) self.classError = T.mean(T.cast(T.neq(self.guessLabel, y), 'float32'))
def _sample_trained_minibatch_gan(params_file, n, batch_size, rs): import lasagne from lasagne.init import Normal import lasagne.layers as ll import theano as th from theano.sandbox.rng_mrg import MRG_RandomStreams import theano.tensor as T import nn theano_rng = MRG_RandomStreams(rs.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rs.randint(2 ** 15))) noise_dim = (batch_size, 100) noise = theano_rng.uniform(size=noise_dim) ls = [ll.InputLayer(shape=noise_dim, input_var=noise)] ls.append(nn.batch_norm( ll.DenseLayer(ls[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) ls.append(ll.ReshapeLayer(ls[-1], (batch_size,512,4,4))) ls.append(nn.batch_norm( nn.Deconv2DLayer(ls[-1], (batch_size,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 ls.append(nn.batch_norm( nn.Deconv2DLayer(ls[-1], (batch_size,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 ls.append(nn.weight_norm( nn.Deconv2DLayer(ls[-1], (batch_size,3,32,32), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(ls[-1]) with np.load(params_file) as d: params = [d['arr_{}'.format(i)] for i in range(9)] ll.set_all_param_values(ls[-1], params, trainable=True) sample_batch = th.function(inputs=[], outputs=gen_dat) samps = [] while len(samps) < n: samps.extend(sample_batch()) samps = np.array(samps[:n]) return samps
def get_generator(self, meanx, z0, y_1hot): ''' specify generator G0, gen_x = G0(z0, h1) ''' """ #z0 = theano_rng.uniform(size=(self.args.batch_size, 16)) # uniform noise gen0_layers = [LL.InputLayer(shape=(self.args.batch_size, 50), input_var=z0)] # Input layer for z0 gen0_layers.append(nn.batch_norm(LL.DenseLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[0], num_units=128, W=Normal(0.02), nonlinearity=nn.relu)), num_units=128, W=Normal(0.02), nonlinearity=nn.relu))) # embedding, 50 -> 128 gen0_layer_z_embed = gen0_layers[-1] #gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 256), input_var=real_fc3)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layer_fc3 = gen0_layers[-1] gen0_layers.append(LL.ConcatLayer([gen0_layer_fc3,gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features gen0_layers.append(LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[-1], num_units=256*5*5, W=Normal(0.02), nonlinearity=T.nnet.relu)), (self.args.batch_size,256,5,5))) # fc gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid)) # deconv gen_x_pre = LL.get_output(gen0_layers[-1], deterministic=False) gen_x = gen_x_pre - meanx # gen_x_joint = LL.get_output(gen0_layers[-1], {gen0_layer_fc3: gen_fc3}, deterministic=False) - meanx return gen0_layers, gen_x """ gen_x_layer_z = LL.InputLayer(shape=(self.args.batch_size, self.args.z0dim), input_var=z0) # z, 20 # gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64 gen_x_layer_y = LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768 gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (self.args.batch_size,256,5,5)) gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid) # gen_x_layer_x = dnn.Conv2DDNNLayer(gen_x_layer_dconv1_2, 3, (1,1), pad=0, stride=1, # W=Normal(0.02), nonlinearity=T.nnet.sigmoid) gen_x_layers = [gen_x_layer_z, gen_x_layer_y, gen_x_layer_y_z, gen_x_layer_pool2, gen_x_layer_dconv2_1, gen_x_layer_dconv2_2, gen_x_layer_dconv1_1, gen_x_layer_x] gen_x_pre = LL.get_output(gen_x_layer_x, deterministic=False) gen_x = gen_x_pre - meanx return gen_x_layers, gen_x
def load_data(): xs = [] ys = [] for j in range(5): d = unpickle('data/cifar-10-python/cifar-10-batches-py/data_batch_'+`j+1`) x = d['data'] y = d['labels'] xs.append(x) ys.append(y) d = unpickle('data/cifar-10-python/cifar-10-batches-py/test_batch') xs.append(d['data']) ys.append(d['labels']) x = np.concatenate(xs)/np.float32(255) y = np.concatenate(ys) x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:])) x = x.reshape((x.shape[0], 32, 32, 3)).transpose(0,3,1,2) # subtract per-pixel mean pixel_mean = np.mean(x[0:50000],axis=0) #pickle.dump(pixel_mean, open("cifar10-pixel_mean.pkl","wb")) x -= pixel_mean # create mirrored images X_train = x[0:50000,:,:,:] Y_train = y[0:50000] # X_train_flip = X_train[:,:,:,::-1] # Y_train_flip = Y_train # X_train = np.concatenate((X_train,X_train_flip),axis=0) # Y_train = np.concatenate((Y_train,Y_train_flip),axis=0) X_test = x[50000:,:,:,:] Y_test = y[50000:] return pixel_mean, dict( X_train=lasagne.utils.floatX(X_train), Y_train=Y_train.astype('int32'), X_test = lasagne.utils.floatX(X_test), Y_test = Y_test.astype('int32'),) ## specify generator, gen_pool5 = G(z, y_1hot) #z = theano_rng.uniform(size=(args.batch_size, 100)) # uniform noise #y_1hot = T.matrix() #gen_pool5_layer_z = LL.InputLayer(shape=(args.batch_size, 100), input_var=z) # z, 100 #gen_pool5_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_z, num_units=256, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # 100 -> 256 #gen_pool5_layer_y = LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot) # y, 10 #gen_pool5_layer_y_embed = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_y, num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # 10 -> 512 #gen_pool5_layer_fc4 = LL.ConcatLayer([gen_pool5_layer_z_embed,gen_pool5_layer_y_embed],axis=1) #512+256 = 768 ##gen_pool5_layer_fc4 = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc5, num_units=512, nonlinearity=T.nnet.relu))#, g=None) #gen_pool5_layer_fc3 = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc4, num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) #gen_pool5_layer_pool5_flat = LL.DenseLayer(gen_pool5_layer_fc3, num_units=4*4*32, nonlinearity=T.nnet.relu) # NO batch normalization at output layer ##gen_pool5_layer_pool5_flat = nn.batch_norm(LL.DenseLayer(gen_pool5_layer_fc3, num_units=4*4*32, W=Normal(0.02), nonlinearity=T.nnet.relu), g=None) # no batch-norm at output layer #gen_pool5_layer_pool5 = LL.ReshapeLayer(gen_pool5_layer_pool5_flat, (args.batch_size,32,4,4)) #gen_pool5_layers = [gen_pool5_layer_z, gen_pool5_layer_z_embed, gen_pool5_layer_y, gen_pool5_layer_y_embed, #gen_pool5_layer_fc5, # gen_pool5_layer_fc4, gen_pool5_layer_fc3, gen_pool5_layer_pool5_flat, gen_pool5_layer_pool5] #gen_pool5 = LL.get_output(gen_pool5_layer_pool5, deterministic=False)
def create_model(incoming, options): input_p = 0.2 hidden_p = 0.5 conv_num_filters1 = int(100 / (1.0 - input_p)) conv_num_filters2 = int(150 / (1.0 - hidden_p)) conv_num_filters3 = int(200 / (1.0 - hidden_p)) filter_size1 = 5 filter_size2 = 5 filter_size3 = 3 pool_size = 2 encode_size = int(options['BOTTLENECK'] / 0.5) dense_mid_size = int(options['DENSE'] / 0.5) pad_in = 'valid' pad_out = 'full' scaled_tanh = create_scaled_tanh() dropout0 = DropoutLayer(incoming, p=0.2, name='dropout0') conv2d1 = Conv2DLayer(dropout0, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh) maxpool2d2 = MaxPool2DLayer(conv2d1, pool_size=pool_size, name='maxpool2d2') dropout1 = DropoutLayer(maxpool2d2, name='dropout1') conv2d3 = Conv2DLayer(dropout1, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh) maxpool2d4 = MaxPool2DLayer(conv2d3, pool_size=pool_size, name='maxpool2d4', pad=(1,0)) dropout2 = DropoutLayer(maxpool2d4, name='dropout2') conv2d5 = Conv2DLayer(dropout2, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh) reshape6 = ReshapeLayer(conv2d5, shape=([0], -1), name='reshape6') # 3000 reshape6_output = reshape6.output_shape[1] dropout3 = DropoutLayer(reshape6, name='dropout3') dense7 = DenseLayer(dropout3, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh) dropout4 = DropoutLayer(dense7, name='dropout4') bottleneck = DenseLayer(dropout4, num_units=encode_size, name='bottleneck', nonlinearity=linear) # print_network(bottleneck) dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear) dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9') reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7 deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride, W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh) upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12') deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride, W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh) upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14') deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride, crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh) reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16') return reshape16, bottleneck
def create_model(dbn, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, w_init_fn=GlorotUniform, use_peepholes=False, use_blstm=True): weights, biases, shapes, nonlinearities = dbn gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, shapes, nonlinearities, ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') if use_blstm: l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'blstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') # reshape, flatten to 2 dimensions to run softmax on all timesteps l_reshape3 = ReshapeLayer(l_sum1, (-1, lstm_size), name='reshape3') else: l_lstm = create_lstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm', use_peepholes) l_reshape3 = ReshapeLayer(l_lstm, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer( l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen, output_classes), name='output') return l_out
def create_model_using_pretrained_encoder(weights, biases, input_shape, input_var, mask_shape, mask_var, lstm_size=250, win=T.iscalar('theta'), output_classes=26, w_init_fn=las.init.Orthogonal(), use_peepholes=False, nonlinearities=rectify): gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1] l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1]), name='reshape1') l_encoder = create_pretrained_encoder(l_reshape1, weights, biases, [2000, 1000, 500, 50], [nonlinearities, nonlinearities, nonlinearities, linear], ['fc1', 'fc2', 'fc3', 'bottleneck']) encoder_len = las.layers.get_output_shape(l_encoder)[-1] l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, encoder_len), name='reshape2') l_delta = DeltaLayer(l_reshape2, win, name='delta') l_lstm, l_lstm_back = create_blstm(l_delta, l_mask, lstm_size, cell_parameters, gate_parameters, 'bstm1', use_peepholes) # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. l_sum1 = ElemwiseSumLayer([l_lstm, l_lstm_back], name='sum1') l_forward_slice1 = SliceLayer(l_sum1, -1, 1, name='slice1') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice1, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='output') return l_out
def create_model(incoming, options): conv_num_filters1 = 100 conv_num_filters2 = 150 conv_num_filters3 = 200 filter_size1 = 5 filter_size2 = 5 filter_size3 = 3 pool_size = 2 encode_size = options['BOTTLENECK'] dense_mid_size = options['DENSE'] pad_in = 'valid' pad_out = 'full' scaled_tanh = create_scaled_tanh() dropout0 = DropoutLayer(incoming, p=0.2, name='dropout0') conv2d1 = Conv2DLayer(dropout0, num_filters=conv_num_filters1, filter_size=filter_size1, pad=pad_in, name='conv2d1', nonlinearity=scaled_tanh) bn1 = BatchNormLayer(conv2d1, name='batchnorm1') maxpool2d2 = MaxPool2DLayer(bn1, pool_size=pool_size, name='maxpool2d2') dropout1 = DropoutLayer(maxpool2d2, name='dropout1') conv2d3 = Conv2DLayer(dropout1, num_filters=conv_num_filters2, filter_size=filter_size2, pad=pad_in, name='conv2d3', nonlinearity=scaled_tanh) bn2 = BatchNormLayer(conv2d3, name='batchnorm2') maxpool2d4 = MaxPool2DLayer(bn2, pool_size=pool_size, name='maxpool2d4', pad=(1,0)) dropout2 = DropoutLayer(maxpool2d4, name='dropout2') conv2d5 = Conv2DLayer(dropout2, num_filters=conv_num_filters3, filter_size=filter_size3, pad=pad_in, name='conv2d5', nonlinearity=scaled_tanh) bn3 = BatchNormLayer(conv2d5, name='batchnorm3') reshape6 = ReshapeLayer(bn3, shape=([0], -1), name='reshape6') # 3000 reshape6_output = reshape6.output_shape[1] dropout3 = DropoutLayer(reshape6, name='dropout3') dense7 = DenseLayer(dropout3, num_units=dense_mid_size, name='dense7', nonlinearity=scaled_tanh) bn4 = BatchNormLayer(dense7, name='batchnorm4') dropout4 = DropoutLayer(bn4, name='dropout4') bottleneck = DenseLayer(dropout4, num_units=encode_size, name='bottleneck', nonlinearity=linear) # print_network(bottleneck) dense8 = DenseLayer(bottleneck, num_units=dense_mid_size, W=bottleneck.W.T, name='dense8', nonlinearity=linear) dense9 = DenseLayer(dense8, num_units=reshape6_output, W=dense7.W.T, nonlinearity=scaled_tanh, name='dense9') reshape10 = ReshapeLayer(dense9, shape=([0], conv_num_filters3, 3, 5), name='reshape10') # 32 x 4 x 7 deconv2d11 = Deconv2DLayer(reshape10, conv2d5.input_shape[1], conv2d5.filter_size, stride=conv2d5.stride, W=conv2d5.W, flip_filters=not conv2d5.flip_filters, name='deconv2d11', nonlinearity=scaled_tanh) upscale2d12 = Upscale2DLayer(deconv2d11, scale_factor=pool_size, name='upscale2d12') deconv2d13 = Deconv2DLayer(upscale2d12, conv2d3.input_shape[1], conv2d3.filter_size, stride=conv2d3.stride, W=conv2d3.W, flip_filters=not conv2d3.flip_filters, name='deconv2d13', nonlinearity=scaled_tanh) upscale2d14 = Upscale2DLayer(deconv2d13, scale_factor=pool_size, name='upscale2d14') deconv2d15 = Deconv2DLayer(upscale2d14, conv2d1.input_shape[1], conv2d1.filter_size, stride=conv2d1.stride, crop=(1, 0), W=conv2d1.W, flip_filters=not conv2d1.flip_filters, name='deconv2d14', nonlinearity=scaled_tanh) reshape16 = ReshapeLayer(deconv2d15, ([0], -1), name='reshape16') return reshape16, bottleneck
def create_pretrained_substream(weights, biases, input_shape, input_var, mask_shape, mask_var, name, lstm_size=250, win=T.iscalar('theta'), nonlinearity=rectify, w_init_fn=las.init.Orthogonal(), use_peepholes=True): gate_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_input = InputLayer(input_shape, input_var, 'input_'+name) l_mask = InputLayer(mask_shape, mask_var, 'mask') symbolic_batchsize_raw = l_input.input_var.shape[0] symbolic_seqlen_raw = l_input.input_var.shape[1] l_reshape1_raw = ReshapeLayer(l_input, (-1, input_shape[-1]), name='reshape1_'+name) l_encoder_raw = create_pretrained_encoder(l_reshape1_raw, weights, biases, [2000, 1000, 500, 50], [nonlinearity, nonlinearity, nonlinearity, linear], ['fc1_'+name, 'fc2_'+name, 'fc3_'+name, 'bottleneck_'+name]) input_len = las.layers.get_output_shape(l_encoder_raw)[-1] l_reshape2 = ReshapeLayer(l_encoder_raw, (symbolic_batchsize_raw, symbolic_seqlen_raw, input_len), name='reshape2_'+name) l_delta = DeltaLayer(l_reshape2, win, name='delta_'+name) l_lstm = LSTMLayer( l_delta, int(lstm_size), peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_'+name) return l_lstm
def _init_model(self, in_size, out_size, n_hid=10, learning_rate_sl=0.005, \ learning_rate_rl=0.005, batch_size=32, ment=0.1): # 2-layer MLP self.in_size = in_size # x and y coordinate self.out_size = out_size # up, down, right, left self.batch_size = batch_size self.learning_rate = learning_rate_rl self.n_hid = n_hid input_var, turn_mask, act_mask, reward_var = T.ftensor3('in'), T.imatrix('tm'), \ T.itensor3('am'), T.fvector('r') in_var = T.reshape(input_var, (input_var.shape[0]*input_var.shape[1],self.in_size)) l_mask_in = L.InputLayer(shape=(None,None), input_var=turn_mask) pol_in = T.fmatrix('pol-h') l_in = L.InputLayer(shape=(None,None,self.in_size), input_var=input_var) l_pol_rnn = L.GRULayer(l_in, n_hid, hid_init=pol_in, mask_input=l_mask_in) # B x H x D pol_out = L.get_output(l_pol_rnn)[:,-1,:] l_den_in = L.ReshapeLayer(l_pol_rnn, (turn_mask.shape[0]*turn_mask.shape[1], n_hid)) # BH x D l_out = L.DenseLayer(l_den_in, self.out_size, nonlinearity=lasagne.nonlinearities.softmax) self.network = l_out self.params = L.get_all_params(self.network) # rl probs = L.get_output(self.network) # BH x A out_probs = T.reshape(probs, (input_var.shape[0],input_var.shape[1],self.out_size)) # B x H x A log_probs = T.log(out_probs) act_probs = (log_probs*act_mask).sum(axis=2) # B x H ep_probs = (act_probs*turn_mask).sum(axis=1) # B H_probs = -T.sum(T.sum(out_probs*log_probs,axis=2),axis=1) # B self.loss = 0.-T.mean(ep_probs*reward_var + ment*H_probs) updates = lasagne.updates.rmsprop(self.loss, self.params, learning_rate=learning_rate_rl, \ epsilon=1e-4) self.inps = [input_var, turn_mask, act_mask, reward_var, pol_in] self.train_fn = theano.function(self.inps, self.loss, updates=updates) self.obj_fn = theano.function(self.inps, self.loss) self.act_fn = theano.function([input_var, turn_mask, pol_in], [out_probs, pol_out]) # sl sl_loss = 0.-T.mean(ep_probs) sl_updates = lasagne.updates.rmsprop(sl_loss, self.params, learning_rate=learning_rate_sl, \ epsilon=1e-4) self.sl_train_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss, \ updates=sl_updates) self.sl_obj_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss)