我们从Python开源项目中,提取了以下29个代码示例,用于说明如何使用lasagne.init()。
def mdclW(num_filters,num_channels,filter_size,winit,name,scales): # Coefficient Initializer sinit = lasagne.init.Constant(1.0/(1+len(scales))) # Total filter size size = filter_size + (filter_size-1)*(scales[-1]-1) # Multiscale Dilated Filter W = T.zeros((num_filters,num_channels,size,size)) # Undilated Base Filter baseW = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,num_channels,filter_size,filter_size))),name=name+'.W') for scale in enumerate(scales[::-1]): # enumerate backwards so that we place the main filter on top W = T.set_subtensor(W[:,:,scales[-1]-scale:size-scales[-1]+scale:scale,scales[-1]-scale:size-scales[-1]+scale:scale], baseW*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'.coeff_'+str(scale)).dimshuffle(0,'x','x','x')) return W # Subpixel Upsample Layer from (https://arxiv.org/abs/1609.05158) # This layer uses a set of r^2 set_subtensor calls to reorganize the tensor in a subpixel-layer upscaling style # as done in the ESPCN Magic ony paper for super-resolution. # r is the upscale factor. # c is the number of output channels.
def get_output_for(self, input, init=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.tensordot(input, self.W, [[1], [0]]) abs_dif = (T.sum(abs(activation.dimshuffle(0,1,2,'x') - activation.dimshuffle('x',1,2,0)),axis=2) + 1e6 * T.eye(input.shape[0]).dimshuffle(0,'x',1)) if init: mean_min_abs_dif = 0.5 * T.mean(T.min(abs_dif, axis=2),axis=0) abs_dif /= mean_min_abs_dif.dimshuffle('x',0,'x') self.init_updates = [(self.log_weight_scale, self.log_weight_scale-T.log(mean_min_abs_dif).dimshuffle(0,'x'))] f = T.sum(T.exp(-abs_dif),axis=2) if init: mf = T.mean(f,axis=0) f -= mf.dimshuffle('x',0) self.init_updates.append((self.b, -mf)) else: f += self.b.dimshuffle('x',0) return T.concatenate([input, f], axis=1)
def l2normalize(layer, train_scale=True): W_param = layer.W s = W_param.get_value().shape if len(s)==4: axes_to_sum = (1,2,3) dimshuffle_args = [0,'x','x','x'] k = s[0] else: axes_to_sum = 0 dimshuffle_args = ['x',0] k = s[1] layer.W_scale = layer.add_param(lasagne.init.Constant(1.), (k,), name="W_scale", trainable=train_scale, regularizable=False) layer.W = W_param * (layer.W_scale/T.sqrt(1e-6 + T.sum(T.square(W_param),axis=axes_to_sum))).dimshuffle(*dimshuffle_args) return layer # fully connected layer with weight normalization
def get_output_for(self, input, init=False, deterministic=False, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = T.dot(input, self.W) if init: ma = T.mean(activation, axis=0) activation -= ma.dimshuffle('x',0) stdv = T.sqrt(T.mean(T.square(activation),axis=0)) activation /= stdv.dimshuffle('x',0) self.init_updates = [(self.weight_scale, self.weight_scale/stdv), (self.b, -ma/stdv)] else: activation += self.b.dimshuffle('x', 0) return self.nonlinearity(activation) # comes from Ishamel code base
def __init__(self, incoming, RMAX,DMAX,axes='auto', epsilon=1e-4, alpha=0.1, beta=lasagne.init.Constant(0), gamma=lasagne.init.Constant(1), mean=lasagne.init.Constant(0), inv_std=lasagne.init.Constant(1), **kwargs): super(BatchReNormDNNLayer, self).__init__( incoming, axes, epsilon, alpha, beta, gamma, mean, inv_std, **kwargs) all_but_second_axis = (0,) + tuple(range(2, len(self.input_shape))) self.RMAX,self.DMAX = RMAX,DMAX if self.axes not in ((0,), all_but_second_axis): raise ValueError("BatchNormDNNLayer only supports normalization " "across the first axis, or across all but the " "second axis, got axes=%r" % (axes,))
def __init__(self, incoming, num_filters, filter_size, stride=(1, 1), crop=0, untie_biases=False, W=initmethod(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs): super(DeconvLayer, self).__init__( incoming, num_filters, filter_size, stride, crop, untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs) # rename self.crop to self.pad self.crop = self.pad del self.pad
def __init__(self, incoming, num_kernels, dim_per_kernel=5, theta=lasagne.init.Normal(0.05), log_weight_scale=lasagne.init.Constant(0.), b=lasagne.init.Constant(-1.), **kwargs): super(MinibatchLayer, self).__init__(incoming, **kwargs) self.num_kernels = num_kernels num_inputs = int(np.prod(self.input_shape[1:])) self.theta = self.add_param(theta, (num_inputs, num_kernels, dim_per_kernel), name="theta") self.log_weight_scale = self.add_param(log_weight_scale, (num_kernels, dim_per_kernel), name="log_weight_scale") self.W = self.theta * (T.exp(self.log_weight_scale)/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0,1) self.b = self.add_param(b, (num_kernels,), name="b")
def __init__(self, incoming, num_units, mask_generator,layerIdx,W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): super(MaskedLayer, self).__init__(incoming, num_units, W,b, nonlinearity,**kwargs) self.mask_generator = mask_generator num_inputs = int(np.prod(self.input_shape[1:])) self.weights_mask = self.add_param(spec = np.ones((num_inputs, num_units),dtype=np.float32), shape = (num_inputs, num_units), name='weights_mask', trainable=False, regularizable=False) self.layerIdx = layerIdx self.shuffle_update = [(self.weights_mask, mask_generator.get_mask_layer_UPDATE(self.layerIdx))]
def __init__(self, incoming, num_units, mask_generator,layerIdx,W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=None,**kwargs): super(DIML, self).__init__(incoming, num_units, W,b, nonlinearity,**kwargs) self.mask_generator = mask_generator self.layerIdx = layerIdx num_inputs = int(np.prod(self.input_shape[1:])) self.weights_mask = self.add_param(spec = np.ones((num_inputs, num_units),dtype=np.float32), shape = (num_inputs, num_units), name='weights_mask', trainable=False, regularizable=False) self.shuffle_update = [(self.weights_mask, self.mask_generator.get_direct_input_mask_layer_UPDATE(self.layerIdx + 1))]
def get_output_for(self,input, **kwargs): if input.ndim > 2: input = input.flatten(2) activation = T.dot(input, self.W*self.weights_mask) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return self.nonlinearity(activation) # Conditioning Masked Layer # Currently not used. # class CML(MaskedLayer): # def __init__(self, incoming, num_units, mask_generator,use_cond_mask=False,U=lasagne.init.GlorotUniform(),W=lasagne.init.GlorotUniform(), # b=init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify, **kwargs): # super(CML, self).__init__(incoming, num_units, mask_generator,W, # b, nonlinearity,**kwargs) # self.use_cond_mask=use_cond_mask # if use_cond_mask: # self.U = self.add_param(spec = U, # shape = (num_inputs, num_units), # name='U', # trainable=True, # regularizable=False)theano.shared(value=self.weights_initialization((self.n_in, self.n_out)), name=self.name+'U', borrow=True) # self.add_param(self.U,name = # def get_output_for(self,input,**kwargs): # lin = self.lin_output = T.dot(input, self.W * self.weights_mask) + self.b # if self.use_cond_mask: # lin = lin+T.dot(T.ones_like(input), self.U * self.weights_mask) # return lin if self._activation is None else self._activation(lin) # Made layer, adopted from M.Germain
def __init__(self, incoming, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), W=lasagne.init.Normal(0.05), train_g=False, init_stdv=1., nonlinearity=relu, **kwargs): super(WeightNormLayer, self).__init__(incoming, **kwargs) self.nonlinearity = nonlinearity self.init_stdv = init_stdv k = self.input_shape[1] if b is not None: self.b = self.add_param(b, (k,), name="b", regularizable=False) if g is not None: self.g = self.add_param(g, (k,), name="g", regularizable=False, trainable=train_g) if len(self.input_shape)==4: self.axes_to_sum = (0,2,3) self.dimshuffle_args = ['x',0,'x','x'] else: self.axes_to_sum = 0 self.dimshuffle_args = ['x',0] # scale weights in layer below incoming.W_param = incoming.W #incoming.W_param.set_value(W.sample(incoming.W_param.get_value().shape)) if incoming.W_param.ndim==4: if isinstance(incoming, Deconv2DLayer): W_axes_to_sum = (0,2,3) W_dimshuffle_args = ['x',0,'x','x'] else: W_axes_to_sum = (1,2,3) W_dimshuffle_args = [0,'x','x','x'] else: W_axes_to_sum = 0 W_dimshuffle_args = ['x',0] if g is not None: incoming.W = incoming.W_param * (self.g/T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum))).dimshuffle(*W_dimshuffle_args) else: incoming.W = incoming.W_param / T.sqrt(1e-6 + T.sum(T.square(incoming.W_param),axis=W_axes_to_sum,keepdims=True))
def get_output_for(self, input, init=False, **kwargs): if init: m = T.mean(input, self.axes_to_sum) input -= m.dimshuffle(*self.dimshuffle_args) inv_stdv = self.init_stdv/T.sqrt(T.mean(T.square(input), self.axes_to_sum)) input *= inv_stdv.dimshuffle(*self.dimshuffle_args) self.init_updates = [(self.b, -m*inv_stdv), (self.g, self.g*inv_stdv)] elif hasattr(self,'b'): input += self.b.dimshuffle(*self.dimshuffle_args) return self.nonlinearity(input)
def __init__(self, incoming, target_shape, filter_size, stride=(2, 2), pad='half', W=lasagne.init.Normal(0.05), b=lasagne.init.Constant(0.), nonlinearity=relu, **kwargs): super(Deconv2DLayer, self).__init__(incoming, **kwargs) self.target_shape = target_shape self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity) self.filter_size = lasagne.layers.dnn.as_tuple(filter_size, 2) self.stride = lasagne.layers.dnn.as_tuple(stride, 2) self.pad = pad self.W_shape = (incoming.output_shape[1], target_shape[1], filter_size[0], filter_size[1]) self.W = self.add_param(W, self.W_shape, name="W") if b is not None: self.b = self.add_param(b, (target_shape[1],), name="b") else: self.b = None
def batch_norm(layer, b=lasagne.init.Constant(0.), g=lasagne.init.Constant(1.), **kwargs): """ adapted from https://gist.github.com/f0k/f1a6bd3c8585c400c190 """ nonlinearity = getattr(layer, 'nonlinearity', None) if nonlinearity is not None: layer.nonlinearity = lasagne.nonlinearities.identity else: nonlinearity = lasagne.nonlinearities.identity if hasattr(layer, 'b'): del layer.params[layer.b] layer.b = None return BatchNormLayer(layer, b, g, nonlinearity=nonlinearity, **kwargs)
def __init__(self, incoming, num_units, theta=lasagne.init.Normal(0.1), b=lasagne.init.Constant(0.), weight_scale=lasagne.init.Constant(1.), train_scale=False, nonlinearity=relu, **kwargs): super(DenseLayer, self).__init__(incoming, **kwargs) self.nonlinearity = (lasagne.nonlinearities.identity if nonlinearity is None else nonlinearity) self.num_units = num_units num_inputs = int(np.prod(self.input_shape[1:])) self.theta = self.add_param(theta, (num_inputs, num_units), name="theta") self.weight_scale = self.add_param(weight_scale, (num_units,), name="weight_scale", trainable=train_scale) self.W = self.theta * (self.weight_scale/T.sqrt(T.sum(T.square(self.theta),axis=0))).dimshuffle('x',0) self.b = self.add_param(b, (num_units,), name="b")
def MDCL(incoming,num_filters,scales,name,dnn=True): if dnn: from lasagne.layers.dnn import Conv2DDNNLayer as C2D # W initialization method--this should also work as Orthogonal('relu'), but I have yet to validate that as thoroughly. winit = initmethod(0.02) # Initialization method for the coefficients sinit = lasagne.init.Constant(1.0/(1+len(scales))) # Number of incoming channels ni =lasagne.layers.get_output_shape(incoming)[1] # Weight parameter--the primary parameter for this block W = theano.shared(lasagne.utils.floatX(winit.sample((num_filters,lasagne.layers.get_output_shape(incoming)[1],3,3))),name=name+'W') # Primary Convolution Layer--No Dilation n = C2D(incoming = incoming, num_filters = num_filters, filter_size = [3,3], stride = [1,1], pad = (1,1), W = W*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_base').dimshuffle(0,'x','x','x'), # Note the broadcasting dimshuffle for the num_filter scalars. b = None, nonlinearity = None, name = name+'base' ) # List of remaining layers. This should probably just all be concatenated into a single list rather than being a separate deal. nd = [] for i,scale in enumerate(scales): # I don't think 0 dilation is technically defined (or if it is it's just the regular filter) but I use it here as a convenient keyword to grab the 1x1 mean conv. if scale==0: nd.append(C2D(incoming = incoming, num_filters = num_filters, filter_size = [1,1], stride = [1,1], pad = (0,0), W = T.mean(W,axis=[2,3]).dimshuffle(0,1,'x','x')*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_1x1').dimshuffle(0,'x','x','x'), b = None, nonlinearity = None, name = name+str(scale))) # Note the dimshuffles in this layer--these are critical as the current DilatedConv2D implementation uses a backward pass. else: nd.append(lasagne.layers.DilatedConv2DLayer(incoming = lasagne.layers.PadLayer(incoming = incoming, width=(scale,scale)), num_filters = num_filters, filter_size = [3,3], dilation=(scale,scale), W = W.dimshuffle(1,0,2,3)*theano.shared(lasagne.utils.floatX(sinit.sample(num_filters)), name+'_coeff_'+str(scale)).dimshuffle('x',0,'x','x'), b = None, nonlinearity = None, name = name+str(scale))) return ESL(nd+[n]) # MDC-based Upsample Layer. # This is a prototype I don't make use of extensively. It's operational but it doesn't seem to improve results yet.
def _sample_trained_minibatch_gan(params_file, n, batch_size, rs): import lasagne from lasagne.init import Normal import lasagne.layers as ll import theano as th from theano.sandbox.rng_mrg import MRG_RandomStreams import theano.tensor as T import nn theano_rng = MRG_RandomStreams(rs.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rs.randint(2 ** 15))) noise_dim = (batch_size, 100) noise = theano_rng.uniform(size=noise_dim) ls = [ll.InputLayer(shape=noise_dim, input_var=noise)] ls.append(nn.batch_norm( ll.DenseLayer(ls[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) ls.append(ll.ReshapeLayer(ls[-1], (batch_size,512,4,4))) ls.append(nn.batch_norm( nn.Deconv2DLayer(ls[-1], (batch_size,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 ls.append(nn.batch_norm( nn.Deconv2DLayer(ls[-1], (batch_size,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 ls.append(nn.weight_norm( nn.Deconv2DLayer(ls[-1], (batch_size,3,32,32), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(ls[-1]) with np.load(params_file) as d: params = [d['arr_{}'.format(i)] for i in range(9)] ll.set_all_param_values(ls[-1], params, trainable=True) sample_batch = th.function(inputs=[], outputs=gen_dat) samps = [] while len(samps) < n: samps.extend(sample_batch()) samps = np.array(samps[:n]) return samps
def get_discriminator(self): ''' specify discriminator D0 ''' """ disc0_layers = [LL.InputLayer(shape=(self.args.batch_size, 3, 32, 32))] disc0_layers.append(LL.GaussianNoiseLayer(disc0_layers[-1], sigma=0.05)) disc0_layers.append(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 16x16 disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu))) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 8x8 disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=0, W=Normal(0.02), nonlinearity=nn.lrelu))) # 6x6 disc0_layer_shared = LL.NINLayer(disc0_layers[-1], num_units=192, W=Normal(0.02), nonlinearity=nn.lrelu) # 6x6 disc0_layers.append(disc0_layer_shared) disc0_layer_z_recon = LL.DenseLayer(disc0_layer_shared, num_units=50, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_z_recon) # also need to recover z from x disc0_layers.append(LL.GlobalPoolLayer(disc0_layer_shared)) disc0_layer_adv = LL.DenseLayer(disc0_layers[-1], num_units=10, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_adv) return disc0_layers, disc0_layer_adv, disc0_layer_z_recon """ disc_x_layers = [LL.InputLayer(shape=(None, 3, 32, 32))] disc_x_layers.append(LL.GaussianNoiseLayer(disc_x_layers[-1], sigma=0.2)) disc_x_layers.append(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=0, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers_shared = LL.NINLayer(disc_x_layers[-1], num_units=192, W=Normal(0.01), nonlinearity=nn.lrelu) disc_x_layers.append(disc_x_layers_shared) disc_x_layer_z_recon = LL.DenseLayer(disc_x_layers_shared, num_units=self.args.z0dim, nonlinearity=None) disc_x_layers.append(disc_x_layer_z_recon) # also need to recover z from x # disc_x_layers.append(nn.MinibatchLayer(disc_x_layers_shared, num_kernels=100)) disc_x_layers.append(LL.GlobalPoolLayer(disc_x_layers_shared)) disc_x_layer_adv = LL.DenseLayer(disc_x_layers[-1], num_units=10, W=Normal(0.01), nonlinearity=None) disc_x_layers.append(disc_x_layer_adv) #output_before_softmax_x = LL.get_output(disc_x_layer_adv, x, deterministic=False) #output_before_softmax_gen = LL.get_output(disc_x_layer_adv, gen_x, deterministic=False) # temp = LL.get_output(gen_x_layers[-1], deterministic=False, init=True) # temp = LL.get_output(disc_x_layers[-1], x, deterministic=False, init=True) # init_updates = [u for l in LL.get_all_layers(gen_x_layers)+LL.get_all_layers(disc_x_layers) for u in getattr(l,'init_updates',[])] return disc_x_layers, disc_x_layer_adv, disc_x_layer_z_recon
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = { 'momentum': .999 } net = InputLayer ( name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN(Conv2DLayer (net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool1', pool_size=(2, 2)) net = DropoutLayer (net, name='drop1', p=.5) net = WN(Conv2DLayer (net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN(Conv2DLayer (net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer (net, name='pool2', pool_size=(2, 2)) net = DropoutLayer (net, name='drop2', p=.5) net = WN(Conv2DLayer (net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer (net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer (net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer (net, name='pool3') net = WN(DenseLayer (net, name='dense', num_units=10, **dense_defs), **wn_defs) return net