我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用utils._p()。
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') return params
def param_init_encoder(options, params, prefix='lstm_encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) # It is observed that setting a high initial forget gate bias for LSTMs can # give slighly better results (Le et al., 2015). Hence, the initial forget # gate bias is set to 3. params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def param_init_encoder(options, params, prefix='gru_encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U params[_p(prefix,'b')] = zero_bias(2*n_h) Wx = uniform_weight(n_x, n_h) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(n_h) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = zero_bias(n_h) return params
def init_params(options,W): params = OrderedDict() # W is initialized by the pretrained word embedding params['Wemb'] = W.astype(config.floatX) # otherwise, W will be initialized randomly # n_words = options['n_words'] # n_x = options['n_x'] # params['Wemb'] = uniform_weight(n_words,n_x) length = len(options['filter_shapes']) for idx in range(length): params = param_init_encoder(options['filter_shapes'][idx],params,prefix=_p('cnn_encoder',idx)) n_h = options['feature_maps'] * length params['Wy'] = uniform_weight(n_h,options['n_y']) params['by'] = zero_bias(options['n_y']) return params
def param_init_lstm(self, params, nin, dim, prefix='lstm'): assert prefix is not None # Stack the weight matricies for faster dot prods W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32') return params # This function implements the lstm fprop
def param_init_mlp_layer(input_shape, pred_shape, params, prefix='mlp_layer'): """ input_shape: (num of hiddens, number of input features) pred_shape: (num of labels, number of hiddens) """ W1 = np.asarray(rng.uniform(low=-0.01,high=0.01,size=input_shape),dtype=theano.config.floatX) b1 = np.ones((input_shape[0],), dtype=theano.config.floatX)*0.01 # initialize as 1 rather than 0 V1 = np.asarray(rng.uniform(low=-0.01,high=0.01,size=pred_shape),dtype=theano.config.floatX) # 2*200 c1 = np.ones((pred_shape[0],), dtype=theano.config.floatX)*0.01 # initialize as 1 params[_p(prefix,'W1')] = W1 params[_p(prefix,'b1')] = b1 params[_p(prefix,'V1')] = V1 params[_p(prefix,'c1')] = c1 return params
def mlp_layer_softmax(tparams, layer1_input, prefix='mlp_layer'): """ layer1_input: n_sample * n_feature 64*20 input_shape: (num of hiddens, number of input features) 200*20 pred_shape: (num of labels, number of hiddens) 2*200 y_recon : n_label *n_sample 2*64 """ hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200 y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')] #y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing #y_recons = tensor.nnet.softmax(y_recons) # 64*2 max_w = tensor.max(y_recons, axis = 1, keepdims=True) e0 = tensor.exp(y_recons - max_w) y_recons = e0 / tensor.sum(e0, axis = 1, keepdims=True) return y_recons
def param_init_encoder(options, params, prefix='encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def encoder(tparams, layer0_input, filter_shape, pool_size, options, prefix='cnn_d'): """ filter_shape: (number of filters, num input feature maps, filter height, filter width) image_shape: (batch_size, num input feature maps, image height, image width) """ conv_out = conv.conv2d(input=layer0_input, filters=tparams[_p(prefix,'W')], filter_shape=filter_shape) # conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x')) # output = downsample.max_pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=False) if options['cnn_activation'] == 'tanh': conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x')) output = downsample.max_pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=False) # the ignore border is very important elif options['cnn_activation'] == 'linear': conv_out2 = conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x') output = downsample.max_pool_2d(input=conv_out2, ds=pool_size, ignore_border=False) # the ignore border is very important else: print(' Wrong specification of activation function in CNN') return output.flatten(2) #output.flatten(2)
def param_init_decoder(options, params, prefix='decoder_lstm'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def param_init_decoder(options, params, prefix='decoder_gru'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U params[_p(prefix,'b')] = zero_bias(2*n_h) Wx = uniform_weight(n_x, n_h) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(n_h) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = zero_bias(n_h) params[_p(prefix,'b0')] = zero_bias(n_h) return params
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True): """ Affine transformation + point-wise nonlinearity """ if nin == None: nin = options['dim_proj'] if nout == None: nout = options['dim_proj'] params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho) params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32') return params
def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs): """ Feedforward pass """ return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')]) # GRU layer
def param_init_encoder(filter_shape, params, prefix='cnn_encoder'): """ filter_shape: (number of filters, num input feature maps, filter height, filter width) image_shape: (batch_size, num input feature maps, image height, image width) """ W = np.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape),dtype=theano.config.floatX) b = np.zeros((filter_shape[0],), dtype=theano.config.floatX) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = b return params
def encoder(tparams, layer0_input, filter_shape, pool_size, prefix='cnn_encoder'): """ filter_shape: (number of filters, num input feature maps, filter height, filter width) image_shape: (batch_size, num input feature maps, image height, image width) """ conv_out = conv.conv2d(input=layer0_input, filters=tparams[_p(prefix,'W')], filter_shape=filter_shape) conv_out_tanh = tensor.tanh(conv_out + tparams[_p(prefix,'b')].dimshuffle('x', 0, 'x', 'x')) output = pool.pool_2d(input=conv_out_tanh, ds=pool_size, ignore_border=True) return output.flatten(2)
def param_init_fflayer(self, options, params, prefix='ff', nin=None, nout=None): if nin == None: nin = options['dim_proj'] if nout == None: nout = options['dim_proj'] params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01) params[_p(prefix, 'b')] = numpy.zeros((nout,)).astype('float32') return params
def fflayer(self, tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs): return eval(activ)(tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[ _p(prefix, 'b')]) # LSTM layer
def mlp_layer_tanh(tparams, layer1_input, prefix='mlp_layer'): """ layer1_input: n_sample * n_feature 64*20 input_shape: (num of hiddens, number of input features) 200*20 pred_shape: (num of labels, number of hiddens) 2*200 y_recon : n_label *n_sample 2*64 """ hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200 y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')] #y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing #y_recons = tensor.nnet.softmax(y_recons) # 64*2 y_recons = tensor.tanh(y_recons) return y_recons
def mlp_layer_linear(tparams, layer1_input, prefix='mlp_layer'): """ layer1_input: n_sample * n_feature 64*20 input_shape: (num of hiddens, number of input features) 200*20 pred_shape: (num of labels, number of hiddens) 2*200 y_recon : n_label *n_sample 2*64 """ hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200 y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')] #y_recons = tensor.tanh(y_recons) * 10 # avoid numerical issues/label smoothing #y_recons = tensor.nnet.softmax(y_recons) # 64*2 return y_recons
def middle_layer(tparams, layer1_input, prefix='mlp_layer'): """ layer1_input: n_sample * n_feature 64*20 input_shape: (num of hiddens, number of input features) 200*20 pred_shape: (num of labels, number of hiddens) 2*200 y_recon : n_label *n_sample 2*64 """ hidden_2_out = tensor.nnet.sigmoid(tensor.dot(layer1_input, tparams[_p(prefix,'W1')].T) + tparams[_p(prefix,'b1')] ) # 64*200 # y_recons = tensor.dot(hidden_2_out, tparams[_p(prefix,'V1')].T) + tparams[_p(prefix,'c1')] # avoid numerical issues # y_recons = tensor.nnet.softmax(y_recons) # 64*2 return hidden_2_out
def param_init_decoder(options, params, prefix='decoder'): n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U C = np.concatenate([uniform_weight(n_z,n_h), uniform_weight(n_z,n_h), uniform_weight(n_z,n_h), uniform_weight(n_z,n_h)], axis=1) params[_p(prefix,'C')] = C params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) C0 = uniform_weight(n_z, n_h) params[_p(prefix,'C0')] = C0 params[_p(prefix,'b0')] = zero_bias(n_h) params[_p(prefix,'b_y')] = zero_bias(n_x) # 48 return params
def param_init_encoder(filter_shape, params, prefix='cnn_d'): """ filter_shape: (number of filters, num input feature maps, filter height, filter width) image_shape: (batch_size, num input feature maps, image height, image width) """ W = np.asarray(rng.uniform(low=-0.01,high=0.01,size=filter_shape),dtype=theano.config.floatX) b = np.zeros((filter_shape[0],), dtype=theano.config.floatX) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = b return params
def param_init_batch_norm(input_shape,params, prefix='cnn'): """ input_shape: (num of hiddens, number of input features) pred_shape: (num of labels, number of hiddens) """ beta = np.ones((input_shape[1],),dtype=theano.config.floatX) *0.01 gamma = np.ones((input_shape[1],),dtype=theano.config.floatX) *0.1 params[_p(prefix,'beta')] = beta params[_p(prefix,'gamma')] = gamma return params
def batch_norm(tparams, input, options, prefix='cnn'): """ layer1_input: n_sample * n_feature 64*20 input_shape: (num of hiddens, number of input features) 200*20 pred_shape: (num of labels, number of hiddens) 2*200 y_recon : n_label *n_sample 2*64 """ input_hat=(input-input.mean(0))/(input.std(0)+1.0/options['L']) input_=input_hat*tparams[_p(prefix,'gamma')]+tparams[_p(prefix,'beta')] return input_
def param_init_decoder(options, params, prefix='decoder_lstm'): n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U #C = np.concatenate([uniform_weight(n_z,n_h), # uniform_weight(n_z,n_h), # uniform_weight(n_z,n_h), # uniform_weight(n_z,n_h)], axis=1) #params[_p(prefix,'C')] = C params[_p(prefix,'b')] = zero_bias(4*n_h) #params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) C0 = uniform_weight(n_z, n_h) params[_p(prefix,'C0')] = C0 params[_p(prefix,'b0')] = zero_bias(n_h) return params
def decoder_layer(tparams, state_below, prefix='decoder_gru'): """ state_below: size of n_steps * n_x """ n_steps = state_below.shape[0] n_h = tparams[_p(prefix,'Ux')].shape[1] state_belowx0 = tparams[_p(prefix, 'b0')] h0vec = tensor.tanh(state_belowx0) h0 = h0vec.dimshuffle('x',0) def _slice(_x, n, dim): return _x[n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')] state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')] def _step_slice(x_, xx_, h_, U, Ux): preact = tensor.dot(h_, U) preact += x_ r = tensor.nnet.sigmoid(_slice(preact, 0, n_h)) u = tensor.nnet.sigmoid(_slice(preact, 1, n_h)) preactx = tensor.dot(h_, Ux) preactx = preactx * r preactx = preactx + xx_ h = tensor.tanh(preactx) h = u * h_ + (1. - u) * h return h seqs = [state_below_[:n_steps-1], state_belowx[:n_steps-1]] _step = _step_slice rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [h0vec], non_sequences = [tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]], name=_p(prefix, '_layers'), n_steps=n_steps-1) #h0x = h0.dimshuffle('x',0,1) return tensor.concatenate((h0,rval))
def gru_layer(tparams, state_below, init_state, options, prefix='gru', mask=None, **kwargs): """ Feedforward pass through GRU """ nsteps = state_below.shape[0] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 dim = tparams[_p(prefix,'Ux')].shape[1] if init_state == None: init_state = tensor.alloc(0., n_samples, dim) if mask == None: mask = tensor.alloc(1., state_below.shape[0], 1) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')] state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')] U = tparams[_p(prefix, 'U')] Ux = tparams[_p(prefix, 'Ux')] def _step_slice(m_, x_, xx_, h_, U, Ux): preact = tensor.dot(h_, U) preact += x_ r = tensor.nnet.sigmoid(_slice(preact, 0, dim)) u = tensor.nnet.sigmoid(_slice(preact, 1, dim)) preactx = tensor.dot(h_, Ux) preactx = preactx * r preactx = preactx + xx_ h = tensor.tanh(preactx) h = u * h_ + (1. - u) * h h = m_[:,None] * h + (1. - m_)[:,None] * h_ return h seqs = [mask, state_below_, state_belowx] _step = _step_slice rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [init_state], non_sequences = [tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]], name=_p(prefix, '_layers'), n_steps=nsteps, profile=False, strict=True) rval = [rval] return rval
def encoder(tparams, state_below, mask, seq_output=False, prefix='lstm_encoder'): """ state_below: size of n_steps * n_samples * n_x """ n_steps = state_below.shape[0] n_samples = state_below.shape[1] n_h = tparams[_p(prefix,'U')].shape[0] def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \ tparams[_p(prefix, 'b')] def _step(m_, x_, h_, c_, U): preact = tensor.dot(h_, U) preact += x_ i = tensor.nnet.sigmoid(_slice(preact, 0, n_h)) f = tensor.nnet.sigmoid(_slice(preact, 1, n_h)) o = tensor.nnet.sigmoid(_slice(preact, 2, n_h)) c = tensor.tanh(_slice(preact, 3, n_h)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c seqs = [mask, state_below_] rval, updates = theano.scan(_step, sequences=seqs, outputs_info=[tensor.alloc(numpy_floatX(0.), n_samples,n_h), tensor.alloc(numpy_floatX(0.), n_samples,n_h)], non_sequences = [tparams[_p(prefix, 'U')]], name=_p(prefix, '_layers'), n_steps=n_steps, strict=True) h_rval = rval[0] if seq_output: return h_rval else: # size of n_samples * n_h return h_rval[-1]
def encoder(tparams, state_below, mask, seq_output=False, prefix='gru_encoder'): """ state_below: size of n_steps * n_samples * n_x """ n_steps = state_below.shape[0] n_samples = state_below.shape[1] n_h = tparams[_p(prefix,'Ux')].shape[1] def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \ tparams[_p(prefix, 'b')] state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + \ tparams[_p(prefix, 'bx')] def _step(m_, x_, xx_, h_, U, Ux): preact = tensor.dot(h_, U) preact += x_ r = tensor.nnet.sigmoid(_slice(preact, 0, n_h)) u = tensor.nnet.sigmoid(_slice(preact, 1, n_h)) preactx = tensor.dot(h_, Ux) preactx = preactx * r preactx = preactx + xx_ h = tensor.tanh(preactx) h = u * h_ + (1. - u) * h h = m_[:,None] * h + (1. - m_)[:,None] * h_ return h seqs = [mask, state_below_, state_belowx] rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [tensor.alloc(numpy_floatX(0.), n_samples, n_h)], non_sequences = [tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]], name=_p(prefix, '_layers'), n_steps=n_steps, strict=True) if seq_output: return rval else: # size of n_samples * n_h return rval[-1]
def param_init_lstm_cond(self, options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None): if nin == None: nin = options['dim'] if dim == None: dim = options['dim'] if dimctx == None: dimctx = options['dim'] # input to LSTM W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W # LSTM to LSTM U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U # bias to LSTM params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32') # context to LSTM # Wc = norm_weight(dimctx, dim * 4) # params[_p(prefix, 'Wc')] = Wc # attention: context -> hidden Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix, 'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(dim, dimctx) params[_p(prefix, 'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx,)).astype('float32') params[_p(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[_p(prefix, 'U_att')] = U_att c_att = numpy.zeros((1,)).astype('float32') params[_p(prefix, 'c_tt')] = c_att if options['selector']: # attention: selector W_sel = norm_weight(dim, 1) params[_p(prefix, 'W_sel')] = W_sel b_sel = numpy.float32(0.) params[_p(prefix, 'b_sel')] = b_sel return params
def encoder(tparams, state_below, mask, seq_output=False, prefix='encoder'): """ state_below: size of n_steps * n_samples * n_x """ n_steps = state_below.shape[0] n_samples = state_below.shape[1] n_h = tparams[_p(prefix,'U')].shape[0] def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \ tparams[_p(prefix, 'b')] def _step(m_, x_, h_, c_, U): preact = tensor.dot(h_, U) preact += x_ i = tensor.nnet.sigmoid(_slice(preact, 0, n_h)) f = tensor.nnet.sigmoid(_slice(preact, 1, n_h)) o = tensor.nnet.sigmoid(_slice(preact, 2, n_h)) c = tensor.tanh(_slice(preact, 3, n_h)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c seqs = [mask, state_below_] rval, updates = theano.scan(_step, sequences=seqs, outputs_info=[tensor.alloc(numpy_floatX(0.), n_samples,n_h), tensor.alloc(numpy_floatX(0.), n_samples,n_h)], non_sequences = [tparams[_p(prefix, 'U')]], name=_p(prefix, '_layers'), n_steps=n_steps, strict=True) h_rval = rval[0] if seq_output: return h_rval else: # size of n_samples * n_h return h_rval[-1]
def decoder(tparams, state_below, z, mask=None, prefix='decoder'): """ state_below: size of n_steps * n_samples * n_x z: size of n_samples * n_z """ n_steps = state_below.shape[0] n_samples = state_below.shape[1] n_h = tparams[_p(prefix,'U')].shape[0] # n_samples * n_h state_belowx0 = tensor.dot(z, tparams[_p(prefix, 'C0')]) + \ tparams[_p(prefix, 'b0')] h0 = tensor.tanh(state_belowx0) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] # n_steps * n_samples * n_h state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + \ tensor.dot(z, tparams[_p(prefix, 'C')]) + tparams[_p(prefix, 'b')] def _step(m_, x_, h_, c_, U): preact = tensor.dot(h_, U) preact += x_ i = tensor.nnet.sigmoid(_slice(preact, 0, n_h)) f = tensor.nnet.sigmoid(_slice(preact, 1, n_h)) o = tensor.nnet.sigmoid(_slice(preact, 2, n_h)) c = tensor.tanh(_slice(preact, 3, n_h)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c seqs = [mask[:n_steps-1], state_below_[:n_steps-1]] rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [h0,tensor.alloc(numpy_floatX(0.), n_samples,n_h)], non_sequences = [tparams[_p(prefix, 'U')]], name=_p(prefix, '_layers'), n_steps=n_steps-1, strict=True) h0x = tensor.shape_padleft(h0) h_rval = rval[0] return tensor.concatenate((h0x,h_rval))
def decoder_layer(tparams, state_below, z, mask, prefix='decoder_lstm'): """ state_below: size of n_steps * n_samples * n_x z: size of n_samples * n_z """ nsteps = state_below.shape[0] n_h = tparams[_p(prefix,'U')].shape[0] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 state_belowx0 = tensor.dot(z, tparams[_p(prefix, 'C0')]) + \ tparams[_p(prefix, 'b0')] h0 = tensor.tanh(state_belowx0) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')] # tensor.dot(z, tparams[_p(prefix, 'C')]) def _step(m_, x_, h_, c_, U): preact = tensor.dot(h_, U) preact += x_ i = tensor.nnet.sigmoid(_slice(preact, 0, n_h)) f = tensor.nnet.sigmoid(_slice(preact, 1, n_h)) o = tensor.nnet.sigmoid(_slice(preact, 2, n_h)) c = tensor.tanh(_slice(preact, 3, n_h)) c = f * c_ + i * c c = m_[:, None] * c + (1. - m_)[:, None] * c_ h = o * tensor.tanh(c) h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h, c seqs = [mask[:nsteps-1], state_below_[:nsteps-1]] rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [h0,tensor.alloc(numpy_floatX(0.), n_samples,n_h)], non_sequences = [tparams[_p(prefix, 'U')]], name=_p(prefix, '_layers'), n_steps=nsteps-1, strict=True) h0x = h0.dimshuffle('x',0,1) h_rval = rval[0] return tensor.concatenate((h0x,h_rval))