我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用tensorflow.matmul()。
def inference(self): """main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.concat, 4.FC layer 5.softmax """ #1.get emebedding of words in the sentence self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size] #2. Bi-lstm layer # define lstm cess:get lstm cell output lstm_fw_cell=rnn.BasicLSTMCell(self.hidden_size) #forward direction cell lstm_bw_cell=rnn.BasicLSTMCell(self.hidden_size) #backward direction cell if self.dropout_keep_prob is not None: lstm_fw_cell=rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob=self.dropout_keep_prob) lstm_bw_cell=rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob=self.dropout_keep_prob) # bidirectional_dynamic_rnn: input: [batch_size, max_time, input_size] # output: A tuple (outputs, output_states) # where:outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`. outputs,_=tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype=tf.float32) #[batch_size,sequence_length,hidden_size] #creates a dynamic bidirectional recurrent neural network print("outputs:===>",outputs) #outputs:(<tf.Tensor 'bidirectional_rnn/fw/fw/transpose:0' shape=(?, 5, 100) dtype=float32>, <tf.Tensor 'ReverseV2:0' shape=(?, 5, 100) dtype=float32>)) #3. concat output output_rnn=tf.concat(outputs,axis=2) #[batch_size,sequence_length,hidden_size*2] self.output_rnn_last=tf.reduce_mean(output_rnn,axis=1) #[batch_size,hidden_size*2] #output_rnn_last=output_rnn[:,-1,:] ##[batch_size,hidden_size*2] #TODO print("output_rnn_last:", self.output_rnn_last) # <tf.Tensor 'strided_slice:0' shape=(?, 200) dtype=float32> #4. logits(use linear layer) with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. logits = tf.matmul(self.output_rnn_last, self.W_projection) + self.b_projection # [batch_size,num_classes] return logits
def dense(inputs, units, bias_shape, w_i, b_i=None, activation=tf.nn.relu): # ??tf.layers?????flatten # dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50) if not isinstance(inputs, ops.Tensor): inputs = ops.convert_to_tensor(inputs, dtype='float') # dim_list = inputs.get_shape().as_list() # flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:]) # reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape]) if len(inputs.shape) > 2: inputs = tf.contrib.layers.flatten(inputs) flatten_shape = inputs.shape[1] weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i) dense = tf.matmul(inputs, weights) if bias_shape is not None: assert bias_shape[0] == units biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i) return activation(dense + biases) if activation is not None else dense + biases return activation(dense) if activation is not None else dense
def ae(x): if nonlinearity_name == 'relu': f = tf.nn.relu elif nonlinearity_name == 'elu': f = tf.nn.elu elif nonlinearity_name == 'gelu': # def gelu(x): # return tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.) # f = gelu def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) f = gelu_fast elif nonlinearity_name == 'silu': def silu(_x): return _x * tf.sigmoid(_x) f = silu # elif nonlinearity_name == 'soi': # def soi_map(x): # u = tf.random_uniform(tf.shape(x)) # mask = tf.to_float(tf.less(u, (1 + tf.erf(x / tf.sqrt(2.))) / 2.)) # return tf.cond(is_training, lambda: tf.mul(mask, x), # lambda: tf.mul(x, tf.erfc(-x / tf.sqrt(2.)) / 2.)) # f = soi_map else: raise NameError("Need 'relu', 'elu', 'gelu', or 'silu' for nonlinearity_name") h1 = f(tf.matmul(x, W['1']) + b['1']) h2 = f(tf.matmul(h1, W['2']) + b['2']) h3 = f(tf.matmul(h2, W['3']) + b['3']) h4 = f(tf.matmul(h3, W['4']) + b['4']) h5 = f(tf.matmul(h4, W['5']) + b['5']) h6 = f(tf.matmul(h5, W['6']) + b['6']) h7 = f(tf.matmul(h6, W['7']) + b['7']) return tf.matmul(h7, W['8']) + b['8']
def baseline_forward(self, X, size, n_class): shape = X.get_shape() _X = tf.transpose(X, [1, 0, 2]) # batch_size x sentence_length x word_length -> batch_size x sentence_length x word_length _X = tf.reshape(_X, [-1, int(shape[2])]) # (batch_size x sentence_length) x word_length seq = tf.split(0, int(shape[1]), _X) # sentence_length x (batch_size x word_length) with tf.name_scope("LSTM"): lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, seq, dtype=tf.float32) with tf.name_scope("LSTM-Classifier"): W = tf.Variable(tf.random_normal([size, n_class]), name="W") b = tf.Variable(tf.random_normal([n_class]), name="b") output = tf.matmul(outputs[-1], W) + b return output
def makeDNN(hidden_layer): # input from X prevLayer = X # make layers for i in range(hidden_layer): if i==0: newWeight = tf.get_variable("W0%d" % i, shape=[features, wide], initializer=tf.contrib.layers.xavier_initializer()) else: newWeight = tf.get_variable("W0%d" % i, shape=[wide, wide], initializer=tf.contrib.layers.xavier_initializer()) newBias = tf.Variable(tf.random_normal([wide])) newLayer = tf.nn.relu(tf.matmul(prevLayer, newWeight) + newBias) newDropLayer = tf.nn.dropout(newLayer, dropout_rate) prevLayer = newDropLayer # make output layers Wo = tf.get_variable("Wo", shape=[wide, labels], initializer=tf.contrib.layers.xavier_initializer()) bo = tf.Variable(tf.random_normal([labels])) return tf.matmul(prevLayer, Wo) + bo # tf Graph Input
def get_loss(pred, label, end_points, reg_weight=0.001): """ pred: B*NUM_CLASSES, label: B, """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label) classify_loss = tf.reduce_mean(loss) tf.summary.scalar('classify loss', classify_loss) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) mat_diff -= tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) tf.summary.scalar('mat loss', mat_diff_loss) return classify_loss + mat_diff_loss * reg_weight
def get_loss(pred, label, end_points, reg_weight=0.001): """ pred: BxNxC, label: BxN, """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label) classify_loss = tf.reduce_mean(loss) tf.scalar_summary('classify loss', classify_loss) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) mat_diff -= tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) tf.scalar_summary('mat_loss', mat_diff_loss) return classify_loss + mat_diff_loss * reg_weight
def get_loss(l_pred, seg_pred, label, seg, weight, end_points): per_instance_label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=l_pred, labels=label) label_loss = tf.reduce_mean(per_instance_label_loss) # size of seg_pred is batch_size x point_num x part_cat_num # size of seg is batch_size x point_num per_instance_seg_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=seg_pred, labels=seg), axis=1) seg_loss = tf.reduce_mean(per_instance_seg_loss) per_instance_seg_pred_res = tf.argmax(seg_pred, 2) # Enforce the transformation as orthogonal matrix transform = end_points['transform'] # BxKxK K = transform.get_shape()[1].value mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1])) - tf.constant(np.eye(K), dtype=tf.float32) mat_diff_loss = tf.nn.l2_loss(mat_diff) total_loss = weight * seg_loss + (1 - weight) * label_loss + mat_diff_loss * 1e-3 return total_loss, label_loss, per_instance_label_loss, seg_loss, per_instance_seg_loss, per_instance_seg_pred_res
def __init__(self, embedding): self.sess = tf.Session() self.inputs = tf.placeholder(tf.float32, [None, embedding.shape[1]], name='inputs') self.test_vec = tf.placeholder(tf.float32, [1, embedding.shape[1]], name='test_vec') self.cos_distance = tf.matmul(self.inputs, tf.transpose(self.test_vec)) #----------------------------------------------------------------------- # Compute normalized embedding matrix #----------------------------------------------------------------------- row_sum = tf.reduce_sum(tf.square(self.inputs), axis=1, keep_dims=True) norm = tf.sqrt(row_sum) self.normalized = self.inputs / norm self.embedding = self.sess.run(self.normalized, feed_dict={self.inputs: embedding}) #---------------------------------------------------------------------------
def discriminate(self, image, Y): print("Initializing the discriminator") print("Y shape", Y.get_shape()) yb = tf.reshape(Y, tf.stack([self.batch_size, 1, 1, self.dim_y])) print("image shape", image.get_shape()) print("yb shape", yb.get_shape()) X = tf.concat([image, yb * tf.ones([self.batch_size, 24, 24, self.dim_y])],3) print("X shape", X.get_shape()) h1 = lrelu( tf.nn.conv2d( X, self.discrim_W1, strides=[1,2,2,1], padding='SAME' )) print("h1 shape", h1.get_shape()) h1 = tf.concat([h1, yb * tf.ones([self.batch_size, 12, 12, self.dim_y])],3) print("h1 shape", h1.get_shape()) h2 = lrelu(batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME')) ) print("h2 shape", h2.get_shape()) h2 = tf.reshape(h2, [self.batch_size, -1]) h2 = tf.concat([h2, Y], 1) discri=tf.matmul(h2, self.discrim_W3 ) print("discri shape", discri.get_shape()) h3 = lrelu(batchnormalize(discri)) return h3
def samples_generator(self, batch_size): Z = tf.placeholder(tf.float32, [batch_size, self.dim_z]) Y = tf.placeholder(tf.float32, [batch_size, self.dim_y]) yb = tf.reshape(Y, [batch_size, 1, 1, self.dim_y]) Z_ = tf.concat([Z,Y], 1) h1 = tf.nn.relu(batchnormalize(tf.matmul(Z_, self.gen_W1))) h1 = tf.concat([h1, Y], 1) h2 = tf.nn.relu(batchnormalize(tf.matmul(h1, self.gen_W2))) h2 = tf.reshape(h2, [batch_size,6,6,self.dim_W2]) h2 = tf.concat([h2, yb*tf.ones([batch_size, 6,6, self.dim_y])], 3) output_shape_l3 = [batch_size,12,12,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3) ) h3 = tf.concat([h3, yb*tf.ones([batch_size, 12,12,self.dim_y])], 3) output_shape_l4 = [batch_size,24,24,self.dim_channel] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) x = tf.nn.sigmoid(h4) return Z, Y, x
def transfer_color(content, style): import scipy.linalg as sl # Mean and covariance of content content_mean = np.mean(content, axis = (0, 1)) content_diff = content - content_mean content_diff = np.reshape(content_diff, (-1, content_diff.shape[2])) content_covariance = np.matmul(content_diff.T, content_diff) / (content_diff.shape[0]) # Mean and covariance of style style_mean = np.mean(style, axis = (0, 1)) style_diff = style - style_mean style_diff = np.reshape(style_diff, (-1, style_diff.shape[2])) style_covariance = np.matmul(style_diff.T, style_diff) / (style_diff.shape[0]) # Calculate A and b A = np.matmul(sl.sqrtm(content_covariance), sl.inv(sl.sqrtm(style_covariance))) b = content_mean - np.matmul(A, style_mean) # Construct new style new_style = np.reshape(style, (-1, style.shape[2])).T new_style = np.matmul(A, new_style).T new_style = np.reshape(new_style, style.shape) new_style = new_style + b return new_style
def rotate_points(orig_points, angle, w, h): """Return rotated points Args: orig_points: 'Tensor' with shape [N,2], each entry is point (x,y) angle: rotate radians Returns: 'Tensor' with shape [N,2], with rotated points """ # rotation rotate_mat = tf.stack([[tf.cos(angle) / w, tf.sin(angle) / h], [-tf.sin(angle) / w, tf.cos(angle) / h]]) # shift coord orig_points = tf.subtract(orig_points, 0.5) orig_points = tf.stack([orig_points[:, 0] * w, orig_points[:, 1] * h], axis=1) print(orig_points) rotated_points = tf.matmul(orig_points, rotate_mat) + 0.5 return rotated_points
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): shape = input_.get_shape().as_list() #mat_shape=tf.stack([tf.shape(input_)[1],output_size]) mat_shape=[shape[1],output_size] with tf.variable_scope(scope or "Linear"): #matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, matrix = tf.get_variable("Matrix", mat_shape, tf.float32, tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start)) if with_w: return tf.matmul(input_, matrix) + bias, matrix, bias else: return tf.matmul(input_, matrix) + bias #minibatch method that improves on openai #because it doesn't fix batchsize: #TODO: recheck when not sleepy
def _fc(self, x, fan_in, fan_out, layer_name, activation=None, L2=1, use_bias=True, wmin=None,wmax=None,analysis=False): show_weight = self.flags.visualize and 'weight' in self.flags.visualize if wmin is not None or wmax is not None: use_bias = False assert wmin is not None and wmax is not None with tf.variable_scope(layer_name.split('/')[-1]): w,b = self._get_fc_weights(fan_in, fan_out, layer_name) if wmin is not None: wr = wmax-wmin w = self._activate(w,'sigmoid')*wr+wmin #w = tf.clip_by_value(w,wmin,wmax) net = tf.matmul(x,w) if use_bias: net = tf.nn.bias_add(net, b) net = self._activate(net, activation) if show_weight: tf.summary.histogram(name='W', values=w, collections=[tf.GraphKeys.WEIGHTS]) if use_bias: tf.summary.histogram(name='bias', values=b, collections=[tf.GraphKeys.WEIGHTS]) if analysis: net1 = tf.expand_dims(x,2)*tf.expand_dims(w,0) #net1 = tf.reshape(net1,[tf.shape(x)[0],fan_in*fan_out]) return net,net1 return net
def __init__(self, sigma=0.1, beta_sampling=True, **kwargs): """ sigma: Standard deviation of input data, for use in sampling. beta_sampling: Use beta distribution for sampling, instead of Gaussian. """ RBM.__init__(self, **kwargs) if not kwargs.get('fromfile'): self.sigma = sigma self.beta_sampling = beta_sampling if self.sigma is None: raise AssertionError('Need to supply sigma param.') self.hidden = tf.placeholder(self.dtype, name='hidden', shape=[None, self.n_hidden]) self.mean_v = tf.sigmoid(tf.matmul(self.hidden, self.params['W'], transpose_b=True) + self.params['bvis'])
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs): """ Cost for given input batch of samples, under current params. """ h = self.get_h_inputs(inputs) z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz'] z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz'] # KL divergence between latent space induced by encoder and ... lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1) z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation h = self.get_h_latents(z) x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx']) x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx']) # x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1) # decoding likelihood term like_loss = tf.reduce_sum(tf.log(x_sig + eps) + (inputs - x_mu)**2 / x_sig, 1) # # Mean cross entropy between input and encode-decoded input. # like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1) return .5 * tf.reduce_mean(like_loss + lat_loss)
def output_module(self): """ 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. 2.non-linearity of query and hidden state to get label. input: query_embedding:[batch_size,embed_size], hidden state:[batch_size,block_size,hidden_size] of memory :return:y: predicted label.[] """ # 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. # 1.1 get possibility distribution (of similiarity) p=tf.nn.softmax(tf.multiply(tf.expand_dims(self.query_embedding,axis=1),self.hidden_state)) #shape:[batch_size,block_size,hidden_size]<---query_embedding_expand:[batch_size,1,hidden_size]; hidden_state:[batch_size,block_size,hidden_size] # 1.2 get weighted sum of hidden state u=tf.reduce_sum(tf.multiply(p,self.hidden_state),axis=1) #shape:[batch_size,hidden_size]<----------([batch_size,block_size,hidden_size],[batch_size,block_size,hidden_size]) # 2.non-linearity of query and hidden state to get label H_u_matmul=tf.matmul(u,self.H)+self.h_u_bias #shape:[batch_size,hidden_size]<----([batch_size,hidden_size],[hidden_size,hidden_size]) activation=self.activation(self.query_embedding + H_u_matmul,scope="query_add_hidden") #shape:[batch_size,hidden_size] activation = tf.nn.dropout(activation,keep_prob=self.dropout_keep_prob) #shape:[batch_size,hidden_size] y=tf.matmul(activation,self.R)+self.y_bias #shape:[batch_size,vocab_size]<-----([batch_size,hidden_size],[hidden_size,vocab_size]) return y #shape:[batch_size,vocab_size]
def inference(self): """ building blocks: encoder:6 layers.each layers has two sub-layers. the first is multi-head self-attention mechanism; the second is position-wise fully connected feed-forward network. for each sublayer. use LayerNorm(x+Sublayer(x)). all dimension=512. decoder:6 layers.each layers has three sub-layers. the second layer is performs multi-head attention over the ouput of the encoder stack. for each sublayer. use LayerNorm(x+Sublayer(x)). """ # 1.embedding for encoder input & decoder input # 1.1 position embedding for encoder input input_x_embeded = tf.nn.embedding_lookup(self.Embedding,self.input_x) #[None,sequence_length, embed_size] input_x_embeded=tf.multiply(input_x_embeded,tf.sqrt(tf.cast(self.d_model,dtype=tf.float32))) input_mask=tf.get_variable("input_mask",[self.sequence_length,1],initializer=self.initializer) input_x_embeded=tf.add(input_x_embeded,input_mask) #[None,sequence_length,embed_size].position embedding. # 2. encoder encoder_class=Encoder(self.d_model,self.d_k,self.d_v,self.sequence_length,self.h,self.batch_size,self.num_layer,input_x_embeded,input_x_embeded,dropout_keep_prob=self.dropout_keep_prob,use_residual_conn=self.use_residual_conn) Q_encoded,K_encoded = encoder_class.encoder_fn() #K_v_encoder Q_encoded=tf.reshape(Q_encoded,shape=(self.batch_size,-1)) #[batch_size,sequence_length*d_model] with tf.variable_scope("output"): logits = tf.matmul(Q_encoded, self.W_projection) + self.b_projection #logits shape:[batch_size*decoder_sent_length,self.num_classes] print("logits:",logits) return logits
def inference(self): """main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.max pooling, 4.FC layer 5.softmax """ #1.get emebedding of words in the sentence self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size] #2. Bi-lstm layer output_conv=self.conv_layer_with_recurrent_structure() #shape:[None,sentence_length,embed_size*3] #2.1 apply nolinearity #b = tf.get_variable("b", [self.embed_size*3]) #h = tf.nn.relu(tf.nn.bias_add(output_conv, b), "relu") #3. max pooling output_pooling=tf.reduce_max(output_conv,axis=1) #shape:[None,embed_size*3] #4. logits(use linear layer) with tf.name_scope("dropout"): h_drop=tf.nn.dropout(output_pooling,keep_prob=self.dropout_keep_prob) #[None,embed_size*3] with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. logits = tf.matmul(h_drop, self.W_projection) + self.b_projection #shape:[batch_size,num_classes]<-----h_drop:[None,embed_size*3];b_projection:[hidden_size*3, self.num_classes] return logits
def extract_argmax_and_embed(embedding, output_projection=None): """ Get a loop_function that extracts the previous symbol and embeds it. Used by decoder. :param embedding: embedding tensor for symbol :param output_projection: None or a pair (W, B). If provided, each fed previous output will first be multiplied by W and added B. :return: A loop function """ def loop_function(prev, _): if output_projection is not None: prev = tf.matmul(prev, output_projection[0]) + output_projection[1] prev_symbol = tf.argmax(prev, 1) #?????INDEX emb_prev = tf.gather(embedding, prev_symbol) #????INDEX???embedding return emb_prev return loop_function # RNN?????? # ???????????????????test,?t???????t+1???s??
def gru_cell_decoder(self, Xt, h_t_minus_1,context_vector): """ single step of gru for word level :param Xt: Xt:[batch_size,embed_size] :param h_t_minus_1:[batch_size,embed_size] :param context_vector. [batch_size,embed_size].this represent the result from attention( weighted sum of input during current decoding step) :return: """ # 1.update gate: decides how much past information is kept and how much new information is added. z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z_decoder) + tf.matmul(h_t_minus_1,self.U_z_decoder) +tf.matmul(context_vector,self.C_z_decoder)+self.b_z_decoder) # z_t:[batch_size,self.hidden_size] # 2.reset gate: controls how much the past state contributes to the candidate state. r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r_decoder) + tf.matmul(h_t_minus_1,self.U_r_decoder) +tf.matmul(context_vector,self.C_r_decoder)+self.b_r_decoder) # r_t:[batch_size,self.hidden_size] # candiate state h_t~ h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h_decoder) +r_t * (tf.matmul(h_t_minus_1, self.U_h_decoder)) +tf.matmul(context_vector, self.C_h_decoder)+ self.b_h_decoder) # h_t_candiate:[batch_size,self.hidden_size] # new state: a linear combine of pervious hidden state and the current new state h_t~ h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size*num_sentences,hidden_size] return h_t,h_t # forward gru for first level: word levels
def inference2(self): """main computation graph here: 1. embeddding layer, 2.Bi-LSTM layer, 3.max pooling, 4.FC layer 5.softmax """ #1.get emebedding of words in the sentence self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #shape:[None,sentence_length,embed_size] #2. Bi-lstm layer output_conv=self.conv_layer_with_recurrent_structure() #shape:[None,sentence_length,embed_size*3] #3. max pooling #print("output_conv:",output_conv) #(3, 5, 8, 100) output_pooling=tf.reduce_max(output_conv,axis=1) #shape:[None,embed_size*3] #print("output_pooling:",output_pooling) #(3, 8, 100) #4. logits(use linear layer) with tf.name_scope("dropout_rcnn"): h_drop=tf.nn.dropout(output_pooling,keep_prob=self.dropout_keep_prob) #[None,embed_size*3] #with tf.name_scope("output"): #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. logits = tf.matmul(h_drop, self.W_projection_rcnn) + self.b_projection_rcnn # [batch_size,num_classes] return logits
def gru_single_step_word_level(self, Xt, h_t_minus_1): """ single step of gru for word level :param Xt: Xt:[batch_size*num_sentences,embed_size] :param h_t_minus_1:[batch_size*num_sentences,embed_size] :return: """ # update gate: decides how much past information is kept and how much new information is added. z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z) + tf.matmul(h_t_minus_1, self.U_z) + self.b_z) # z_t:[batch_size*num_sentences,self.hidden_size] # reset gate: controls how much the past state contributes to the candidate state. r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r) + tf.matmul(h_t_minus_1, self.U_r) + self.b_r) # r_t:[batch_size*num_sentences,self.hidden_size] # candiate state h_t~ h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h) +r_t * (tf.matmul(h_t_minus_1, self.U_h)) + self.b_h) # h_t_candiate:[batch_size*num_sentences,self.hidden_size] # new state: a linear combine of pervious hidden state and the current new state h_t~ h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate # h_t:[batch_size*num_sentences,hidden_size] return h_t
def gru_single_step_sentence_level(self, Xt, h_t_minus_1): # Xt:[batch_size, hidden_size*2]; h_t:[batch_size, hidden_size*2] """ single step of gru for sentence level :param Xt:[batch_size, hidden_size*2] :param h_t_minus_1:[batch_size, hidden_size*2] :return:h_t:[batch_size,hidden_size] """ # update gate: decides how much past information is kept and how much new information is added. z_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_z_sentence) + tf.matmul(h_t_minus_1, self.U_z_sentence) + self.b_z_sentence) # z_t:[batch_size,self.hidden_size] # reset gate: controls how much the past state contributes to the candidate state. r_t = tf.nn.sigmoid(tf.matmul(Xt, self.W_r_sentence) + tf.matmul(h_t_minus_1, self.U_r_sentence) + self.b_r_sentence) # r_t:[batch_size,self.hidden_size] # candiate state h_t~ h_t_candiate = tf.nn.tanh(tf.matmul(Xt, self.W_h_sentence) + r_t * ( tf.matmul(h_t_minus_1, self.U_h_sentence)) + self.b_h_sentence) # h_t_candiate:[batch_size,self.hidden_size] # new state: a linear combine of pervious hidden state and the current new state h_t~ h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_candiate return h_t # forward gru for first level: word levels
def inference(self): """main computation graph here: 1. embeddding layers, 2.convolutional layer, 3.max-pooling, 4.softmax layer.""" # 1.=====>get emebedding of words in the sentence self.embedded_words1 = tf.nn.embedding_lookup(self.Embedding,self.input_x)#[None,sentence_length,embed_size] self.sentence_embeddings_expanded1=tf.expand_dims(self.embedded_words1,-1) #[None,sentence_length,embed_size,1). expand dimension so meet input requirement of 2d-conv self.embedded_words2 = tf.nn.embedding_lookup(self.Embedding,self.input_x2)#[None,sentence_length,embed_size] self.sentence_embeddings_expanded2=tf.expand_dims(self.embedded_words2,-1) #[None,sentence_length,embed_size,1). expand dimension so meet input requirement of 2d-conv #2.1 get features of sentence1 h1=self.conv_relu_pool_dropout(self.sentence_embeddings_expanded1,name_scope_prefix="s1") #[None,num_filters_total] #2.2 get features of sentence2 h2 =self.conv_relu_pool_dropout(self.sentence_embeddings_expanded2,name_scope_prefix="s2") # [None,num_filters_total] #3. concat features h=tf.concat([h1,h2],axis=1) #[None,num_filters_total*2] #4. logits(use linear layer)and predictions(argmax) with tf.name_scope("output"): logits = tf.matmul(h,self.W_projection) + self.b_projection #shape:[None, self.num_classes]==tf.matmul([None,self.num_filters_total*2],[self.num_filters_total*2,self.num_classes]) return logits
def create_critic_net(self, num_states=4, num_actions=1): N_HIDDEN_1 = 400 N_HIDDEN_2 = 300 critic_state_in = tf.placeholder("float",[None,num_states]) critic_action_in = tf.placeholder("float",[None,num_actions]) W1_c = tf.Variable(tf.random_uniform([num_states,N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states))) B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states))) W2_c = tf.Variable(tf.random_uniform([N_HIDDEN_1,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions))) W2_action_c = tf.Variable(tf.random_uniform([num_actions,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions))) B2_c= tf.Variable(tf.random_uniform([N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1+num_actions),1/math.sqrt(N_HIDDEN_1+num_actions))) W3_c= tf.Variable(tf.random_uniform([N_HIDDEN_2,1],-0.003,0.003)) B3_c= tf.Variable(tf.random_uniform([1],-0.003,0.003)) H1_c=tf.nn.softplus(tf.matmul(critic_state_in,W1_c)+B1_c) H2_c=tf.nn.tanh(tf.matmul(H1_c,W2_c)+tf.matmul(critic_action_in,W2_action_c)+B2_c) critic_q_model=tf.matmul(H2_c,W3_c)+B3_c return W1_c, B1_c, W2_c, W2_action_c, B2_c, W3_c, B3_c, critic_q_model, critic_state_in, critic_action_in
def create_actor_net(self, num_states=4, num_actions=1): """ Network that takes states and return action """ N_HIDDEN_1 = 400 N_HIDDEN_2 = 300 actor_state_in = tf.placeholder("float",[None,num_states]) W1_a=tf.Variable(tf.random_uniform([num_states,N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states))) B1_a=tf.Variable(tf.random_uniform([N_HIDDEN_1],-1/math.sqrt(num_states),1/math.sqrt(num_states))) W2_a=tf.Variable(tf.random_uniform([N_HIDDEN_1,N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1),1/math.sqrt(N_HIDDEN_1))) B2_a=tf.Variable(tf.random_uniform([N_HIDDEN_2],-1/math.sqrt(N_HIDDEN_1),1/math.sqrt(N_HIDDEN_1))) W3_a=tf.Variable(tf.random_uniform([N_HIDDEN_2,num_actions],-0.003,0.003)) B3_a=tf.Variable(tf.random_uniform([num_actions],-0.003,0.003)) H1_a=tf.nn.softplus(tf.matmul(actor_state_in,W1_a)+B1_a) H2_a=tf.nn.tanh(tf.matmul(H1_a,W2_a)+B2_a) actor_model=tf.matmul(H2_a,W3_a) + B3_a return W1_a, B1_a, W2_a, B2_a, W3_a, B3_a, actor_state_in, actor_model
def attention(decoder_output, seq_outputs, output_size, time_steps, name="attention"): with tf.variable_scope(name): ui = [] w_1 = tf.get_variable("w1", [output_size, output_size], tf.float32, tf.contrib.layers.xavier_initializer()) w_2 = tf.get_variable("w2", [output_size, output_size], tf.float32, tf.contrib.layers.xavier_initializer()) v = tf.get_variable("v", [output_size, 1], tf.float32, tf.contrib.layers.xavier_initializer()) for seq_out in seq_outputs: ui.append(tf.matmul(tf.nn.tanh(tf.matmul(seq_out, w_1) + tf.matmul(decoder_output, w_2)), v)) return ui
def _score(self, prev_decoder_state, prev_embedding): # Returns scores in a tensor of shape [batch_size, input_sequence_length] if self.mode == 'decode': query_part = self.query_attention_partial_score_placeholder encoder_part = self.encoder_state_attention_partial_scores_placeholder else: query_part = self.query_attention_partial_score encoder_part = self.encoder_state_attention_partial_scores embedding_part = tf.matmul(prev_embedding, self.attention_w_e) output = tf.matmul(prev_decoder_state, self.attention_w) + embedding_part + query_part + encoder_part + self.attention_b output = tf.tanh(output) output = tf.reduce_sum(self.attention_v * output, axis=2) output = tf.transpose(output, [1, 0]) # Handle input document padding by giving a large penalty, eliminating it from the weighted average padding_penalty = -1e20 * tf.to_float(1 - tf.sign(self.documents_placeholder)) masked = output + padding_penalty return masked
def build_model(self): self.build_memory() self.W = tf.Variable(tf.random_normal([self.edim, self.nwords], stddev=self.init_std)) z = tf.matmul(self.hid[-1], self.W) self.loss = tf.nn.softmax_cross_entropy_with_logits(z, self.target) self.lr = tf.Variable(self.current_lr) self.opt = tf.train.GradientDescentOptimizer(self.lr) params = [self.A, self.B, self.C, self.T_A, self.T_B, self.W] grads_and_vars = self.opt.compute_gradients(self.loss,params) clipped_grads_and_vars = [(tf.clip_by_norm(gv[0], self.max_grad_norm), gv[1]) \ for gv in grads_and_vars] inc = self.global_step.assign_add(1) with tf.control_dependencies([inc]): self.optim = self.opt.apply_gradients(clipped_grads_and_vars) tf.initialize_all_variables().run() self.saver = tf.train.Saver()
def _setup(self, x, prev_state, prev_output): """Setup the cell. :param x: Input tensor. :param prev_state: Previous cell state tensor. :param prev_output: Previous cell output tensor. :return: Tuple of cell state and cell output tensors. """ # Input gate. i = tf.nn.sigmoid(tf.matmul(x, self._wi) + tf.matmul(prev_output, self._ui) + self._bi) # Forget gate. f = tf.nn.sigmoid(tf.matmul(x, self._wf) + tf.matmul(prev_output, self._uf) + self._bf) # Output gate. o = tf.nn.sigmoid(tf.matmul(x, self._wo) + tf.matmul(prev_output, self._uo) + self._bo) # Output and state. lin_state = tf.matmul(x, self._wc) + tf.matmul(prev_output, self._uc) + self._bc state = self._activation(lin_state) if self._activation is not None else lin_state state = f * prev_state + i * state output = o * state return state, output
def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0): shape = input_layer.shape input_ = input_layer.tensor try: if len(shape) == 4: input_ = tf.reshape(input_, tf.pack([tf.shape(input_)[0], np.prod(shape[1:])])) input_.set_shape([None, np.prod(shape[1:])]) shape = input_.get_shape().as_list() with tf.variable_scope(scope or "Linear"): matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32, init=tf.random_normal_initializer(stddev=stddev)) bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start)) return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars) except Exception: import ipdb; ipdb.set_trace()
def setupOutput(self): if len(self.input.get_shape()) > 2: input = tf.reshape(self.input,[-1,self.inputShape]) # flatten reduced image into a vector else: input = self.input self.output = tf.matmul(input,self.W)
def setupOutput(self): if len(self.input.get_shape()) > 2: input = tf.reshape(self.input,[-1,self.inputShape]) # flatten reduced image into a vector else: input = self.input self.output = tf.nn.softmax(tf.matmul(input,self.W) + self.b)
def setupOutput(self): if len(self.input.get_shape()) > 2: input = tf.reshape(self.input,[-1,self.inputShape]) # flatten reduced image into a vector else: input = self.input self.output = tf.nn.relu(tf.matmul(input,self.W) + self.b)
def noisy_dense(inputs, units, bias_shape, c_names, w_i, b_i=None, activation=tf.nn.relu, noisy_distribution='factorised'): def f(e_list): return tf.multiply(tf.sign(e_list), tf.pow(tf.abs(e_list), 0.5)) # ??tf.layers?????flatten # dense1 = tf.layers.dense(tf.contrib.layers.flatten(relu5), activation=tf.nn.relu, units=50) if not isinstance(inputs, ops.Tensor): inputs = ops.convert_to_tensor(inputs, dtype='float') # dim_list = inputs.get_shape().as_list() # flatten_shape = dim_list[1] if len(dim_list) <= 2 else reduce(lambda x, y: x * y, dim_list[1:]) # reshaped = tf.reshape(inputs, [dim_list[0], flatten_shape]) if len(inputs.shape) > 2: inputs = tf.contrib.layers.flatten(inputs) flatten_shape = inputs.shape[1] weights = tf.get_variable('weights', shape=[flatten_shape, units], initializer=w_i) w_noise = tf.get_variable('w_noise', [flatten_shape, units], initializer=w_i, collections=c_names) if noisy_distribution == 'independent': weights += tf.multiply(tf.random_normal(shape=w_noise.shape), w_noise) elif noisy_distribution == 'factorised': noise_1 = f(tf.random_normal(tf.TensorShape([flatten_shape, 1]), dtype=tf.float32)) # ??????????????? noise_2 = f(tf.random_normal(tf.TensorShape([1, units]), dtype=tf.float32)) weights += tf.multiply(noise_1 * noise_2, w_noise) dense = tf.matmul(inputs, weights) if bias_shape is not None: assert bias_shape[0] == units biases = tf.get_variable('biases', shape=bias_shape, initializer=b_i) b_noise = tf.get_variable('b_noise', [1, units], initializer=b_i, collections=c_names) if noisy_distribution == 'independent': biases += tf.multiply(tf.random_normal(shape=b_noise.shape), b_noise) elif noisy_distribution == 'factorised': biases += tf.multiply(noise_2, b_noise) return activation(dense + biases) if activation is not None else dense + biases return activation(dense) if activation is not None else dense # ???bias??????relu
def model(data_feed): h1 = f(tf.matmul(data_feed, w1) + b1) h1 = tf.cond(is_training, lambda: tf.nn.dropout(h1, p), lambda: h1) h2 = f(tf.matmul(h1, w2) + b2) h2 = tf.cond(is_training, lambda: tf.nn.dropout(h2, p), lambda: h2) return tf.matmul(h2, w_out) + b_out
def feedforward(x): h1 = f(tf.matmul(x, W['1']) + b['1']) h1 = tf.cond(is_training, lambda: tf.nn.dropout(h1, p), lambda: h1) h2 = f(tf.matmul(h1, W['2']) + b['2']) h2 = tf.cond(is_training, lambda: tf.nn.dropout(h2, p), lambda: h2) h3 = f(tf.matmul(h2, W['3']) + b['3']) h3 = tf.cond(is_training, lambda: tf.nn.dropout(h3, p), lambda: h3) h4 = f(tf.matmul(h3, W['4']) + b['4']) h4 = tf.cond(is_training, lambda: tf.nn.dropout(h4, p), lambda: h4) h5 = f(tf.matmul(h4, W['5']) + b['5']) h5 = tf.cond(is_training, lambda: tf.nn.dropout(h5, p), lambda: h5) return tf.matmul(h5, W['6']) + b['6']
def get_topics(self, sess, topn): topics = [] entropy = [] tw_dist = sess.run(tf.nn.softmax(tf.matmul(self.topic_output_embedding, self.tm_softmax_w) + self.tm_softmax_b)) for ti in xrange(self.config.topic_number): best = matutils.argsort(tw_dist[ti], topn=topn, reverse=True) topics.append(best) entropy.append(scipy.stats.entropy(tw_dist[ti])) return topics, entropy #get top topics and words given a doc
def make_skipgram_softmax_loss(embeddings_matrix, vocabulary_size, vector_size): vectors = tf.get_variable('vectors', (vocabulary_size, vector_size), dtype=tf.float32, initializer=tf.constant_initializer(embeddings_matrix)) minibatch = tf.placeholder(shape=(None, 2), dtype=tf.int32) center_word_vector = tf.nn.embedding_lookup(vectors, minibatch[:,0]) yhat = tf.matmul(center_word_vector, vectors, transpose_b=True) predict_word = minibatch[:,1] loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=predict_word, logits=yhat) loss = tf.reduce_mean(loss) return vectors, minibatch, loss
def call(self, input): if self._space_transform: input = tf.matmul(input, self._space_transform) # input is batch by depth # self._against is output by depth # result is batch by output return tf.matmul(input, self._against, transpose_b=True)
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training): cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)]) encoder_hidden_size = int(enc_hidden_states.get_shape()[-1]) decoder_hidden_size = int(cell_dec.output_size) # if encoder and decoder have different sizes, add a projection layer if encoder_hidden_size != decoder_hidden_size: assert False, (encoder_hidden_size, decoder_hidden_size) with tf.variable_scope('hidden_projection'): kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32) # apply a relu to the projection for good measure enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state) enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]])) else: # flatten and repack the state enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state)) if self.config.connect_output_decoder: cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state) else: cell_dec = InputIgnoringCellWrapper(cell_dec, enc_final_state) if self.config.apply_attention: attention = LuongAttention(self.config.decoder_hidden_size, enc_hidden_states, self.input_length_placeholder, probability_fn=tf.nn.softmax) cell_dec = AttentionWrapper(cell_dec, attention, cell_input_fn=lambda inputs, _: inputs, attention_layer_size=self.config.decoder_hidden_size, initial_cell_state=enc_final_state) enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32) decoder = Seq2SeqDecoder(self.config, self.input_placeholder, self.input_length_placeholder, self.output_placeholder, self.output_length_placeholder, self.batch_number_placeholder) return decoder.decode(cell_dec, enc_final_state, self.config.grammar.output_size, output_embed_matrix, training)
def build_decoder(self): """Inference Network. p(X|h)""" with tf.variable_scope("decoder"): R = tf.get_variable("R", [self.reader.vocab_size, self.h_dim]) b = tf.get_variable("b", [self.reader.vocab_size]) x_i = tf.diag([1.]*self.reader.vocab_size) e = -tf.matmul(tf.matmul(self.h, R, transpose_b=True), x_i) + b self.p_x_i = tf.squeeze(tf.nn.softmax(e))