我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用utils.ortho_weight()。
def param_init_gru(options, param, prefix='gru', nin=None, dim=None): param[prefix + '_W'] = numpy.concatenate( [ uniform_weight(nin, dim), uniform_weight(nin, dim) ], axis=1) param[prefix + '_U'] = numpy.concatenate( [ ortho_weight(dim), ortho_weight(dim) ], axis=1) param[prefix + '_b'] = zero_vector(2 * dim) param[prefix + '_Wx'] = uniform_weight(nin, dim) param[prefix + '_Ux'] = ortho_weight(dim) param[prefix + '_bx'] = zero_vector(dim) return param
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') return params
def param_init_encoder(options, params, prefix='lstm_encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) # It is observed that setting a high initial forget gate bias for LSTMs can # give slighly better results (Le et al., 2015). Hence, the initial forget # gate bias is set to 3. params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def param_init_encoder(options, params, prefix='gru_encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U params[_p(prefix,'b')] = zero_bias(2*n_h) Wx = uniform_weight(n_x, n_h) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(n_h) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = zero_bias(n_h) return params
def param_init_lstm(self, params, nin, dim, prefix='lstm'): assert prefix is not None # Stack the weight matricies for faster dot prods W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32') return params # This function implements the lstm fprop
def param_init_encoder(options, params, prefix='encoder'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def param_init_decoder(options, params, prefix='decoder_lstm'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) return params
def param_init_decoder(options, params, prefix='decoder_gru'): n_x = options['n_x'] n_h = options['n_h'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U params[_p(prefix,'b')] = zero_bias(2*n_h) Wx = uniform_weight(n_x, n_h) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(n_h) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = zero_bias(n_h) params[_p(prefix,'b0')] = zero_bias(n_h) return params
def param_init_decoder(options, params, prefix='decoder'): n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U C = np.concatenate([uniform_weight(n_z,n_h), uniform_weight(n_z,n_h), uniform_weight(n_z,n_h), uniform_weight(n_z,n_h)], axis=1) params[_p(prefix,'C')] = C params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) C0 = uniform_weight(n_z, n_h) params[_p(prefix,'C0')] = C0 params[_p(prefix,'b0')] = zero_bias(n_h) params[_p(prefix,'b_y')] = zero_bias(n_x) # 48 return params
def param_init_decoder(options, params, prefix='decoder'): n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix, 'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix, 'U')] = U C = np.concatenate([uniform_weight(n_z,n_h), uniform_weight(n_z,n_h), uniform_weight(n_z,n_h), uniform_weight(n_z,n_h)], axis=1) params[_p(prefix,'C')] = C params[_p(prefix,'b')] = zero_bias(4*n_h) params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) C0 = uniform_weight(n_z, n_h) params[_p(prefix,'C0')] = C0 params[_p(prefix,'b0')] = zero_bias(n_h) #params[_p(prefix,'b_y')] = zero_bias(n_x) # 48 return params
def param_init_decoder(options, params, prefix='decoder_lstm'): n_x = options['n_x'] n_h = options['n_h'] n_z = options['n_z'] W = np.concatenate([uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h), uniform_weight(n_x,n_h)], axis=1) params[_p(prefix,'W')] = W U = np.concatenate([ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h), ortho_weight(n_h)], axis=1) params[_p(prefix,'U')] = U #C = np.concatenate([uniform_weight(n_z,n_h), # uniform_weight(n_z,n_h), # uniform_weight(n_z,n_h), # uniform_weight(n_z,n_h)], axis=1) #params[_p(prefix,'C')] = C params[_p(prefix,'b')] = zero_bias(4*n_h) #params[_p(prefix, 'b')][n_h:2*n_h] = 3*np.ones((n_h,)).astype(theano.config.floatX) C0 = uniform_weight(n_z, n_h) params[_p(prefix,'C0')] = C0 params[_p(prefix,'b0')] = zero_bias(n_h) return params
def __init__(self, shape, name): # input to LSTM, similar to the above, we stack the matricies for compactness, do one # dot product, and use the slice function below to get the activations for each "gate" self.W = theano.shared(numpy.concatenate( [utils.norm_weight(shape[0],shape[1]), utils.norm_weight(shape[0],shape[1]), utils.norm_weight(shape[0],shape[1]), utils.norm_weight(shape[0],shape[1]) ], axis=1), name=name+"_W") # LSTM to LSTM self.U = theano.shared(numpy.concatenate( [utils.ortho_weight(shape[1]), utils.ortho_weight(shape[1]), utils.ortho_weight(shape[1]), utils.ortho_weight(shape[1]) ], axis=1), name=name+"_U") # bias to LSTM self.b = theano.shared(numpy.zeros((4 * shape[1],)).astype('float32').astype('float32'), name=name+"_b") # context to LSTM self.Wc = theano.shared(utils.norm_weight(shape[2], 4 * shape[1]), name=name+"_Wc") # attention: context -> hidden self.Wc_att = theano.shared(utils.norm_weight(shape[2], ortho=False), name=name+"_Wc_att") # attention: LSTM -> hidden self.Wd_att = theano.shared(utils.norm_weight(shape[1],shape[2]), name=name+"_Wd_att") # attention: hidden bias self.b_att = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att") # optional "deep" attention self.W_att_1 = theano.shared(utils.ortho_weight(shape[2]), name=name+"_W_att_1") self.b_att_1 = theano.shared(numpy.zeros((shape[2],)).astype('float32'), name=name+"_b_att_1") # attention: self.U_att = theano.shared(utils.norm_weight(shape[2], 1), name=name+"_U_att") self.c_att = theano.shared(numpy.zeros((1,)).astype('float32'), name=name+"_c_att") # attention: selector self.W_sel = theano.shared(utils.norm_weight(shape[1], 1), name=name+"_W_sel") self.b_sel = theano.shared(numpy.float32(0.), name=name+"_b_sel")
def param_init_lstm_cond(self, options, params, prefix='lstm_cond', nin=None, dim=None, dimctx=None): if nin == None: nin = options['dim'] if dim == None: dim = options['dim'] if dimctx == None: dimctx = options['dim'] # input to LSTM W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) params[_p(prefix, 'W')] = W # LSTM to LSTM U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim), ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix, 'U')] = U # bias to LSTM params[_p(prefix, 'b')] = numpy.zeros((4 * dim,)).astype('float32') # context to LSTM # Wc = norm_weight(dimctx, dim * 4) # params[_p(prefix, 'Wc')] = Wc # attention: context -> hidden Wc_att = norm_weight(dimctx, ortho=False) params[_p(prefix, 'Wc_att')] = Wc_att # attention: LSTM -> hidden Wd_att = norm_weight(dim, dimctx) params[_p(prefix, 'Wd_att')] = Wd_att # attention: hidden bias b_att = numpy.zeros((dimctx,)).astype('float32') params[_p(prefix, 'b_att')] = b_att # attention: U_att = norm_weight(dimctx, 1) params[_p(prefix, 'U_att')] = U_att c_att = numpy.zeros((1,)).astype('float32') params[_p(prefix, 'c_tt')] = c_att if options['selector']: # attention: selector W_sel = norm_weight(dim, 1) params[_p(prefix, 'W_sel')] = W_sel b_sel = numpy.float32(0.) params[_p(prefix, 'b_sel')] = b_sel return params