我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用tensorflow.contrib.rnn.GRUCell()。
def __init__(self, ob_space, ac_space, size=256, **kwargs): self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space)) for i in range(4): x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) # introduce a "fake" batch dimension of 1 after flatten so that we can do GRU over time dim x = tf.expand_dims(flatten(x), 1) gru = rnn.GRUCell(size) h_init = np.zeros((1, size), np.float32) self.state_init = [h_init] h_in = tf.placeholder(tf.float32, [1, size]) self.state_in = [h_in] gru_outputs, gru_state = tf.nn.dynamic_rnn( gru, x, initial_state=h_in, sequence_length=[size], time_major=True) x = tf.reshape(gru_outputs, [-1, size]) self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01)) self.vf = tf.reshape(linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1]) self.state_out = [gru_state[:1]] self.sample = categorical_sample(self.logits, ac_space)[0, :] self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
def answer_module(self): """ Answer Module:generate an answer from the final memory vector. Input: hidden state from episodic memory module:[batch_size,hidden_size] question:[batch_size, embedding_size] """ steps=self.sequence_length if self.decode_with_sequences else 1 #decoder for a list of tokens with sequence. e.g."x1 x2 x3 x4..." a=self.m_T #init hidden state y_pred=tf.zeros((self.batch_size,self.hidden_size)) #TODO usually we will init this as a special token '<GO>', you can change this line by pass embedding of '<GO>' from outside. logits_list=[] logits_return=None for i in range(steps): cell = rnn.GRUCell(self.hidden_size) y_previous_q=tf.concat([y_pred,self.query_embedding],axis=1) #[batch_hidden_size*2] _, a = cell( y_previous_q,a) logits=tf.layers.dense(a,units=self.num_classes) #[batch_size,vocab_size] logits_list.append(logits) if self.decode_with_sequences:#need to get sequences. logits_return = tf.stack(logits_list, axis=1) # [batch_size,sequence_length,num_classes] else:#only need to get an answer, not sequences logits_return = logits_list[0] #[batcj_size,num_classes] return logits_return
def _create_rnn_cell(self): """ Creates a single RNN cell according to the architecture of this RNN. Returns ------- rnn cell A single RNN cell according to the architecture of this RNN """ keep_prob = 1.0 if self.keep_prob is None else self.keep_prob if self.cell_type == CellType.GRU: return DropoutWrapper(GRUCell(self.num_units), keep_prob, keep_prob) elif self.cell_type == CellType.LSTM: return DropoutWrapper(LSTMCell(self.num_units), keep_prob, keep_prob) else: raise ValueError("unknown cell type: {}".format(self.cell_type))
def _build_model(self, batch_size, helper_build_fn, decoder_maxiters=None, alignment_history=False): # embed input_data into a one-hot representation inputs = tf.one_hot(self.input_data, self._input_size, dtype=self._dtype) inputs_len = self.input_lengths with tf.name_scope('bidir-encoder'): fw_cell = rnn.MultiRNNCell([rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True) bw_cell = rnn.MultiRNNCell([rnn.BasicRNNCell(self._enc_rnn_size) for i in range(3)], state_is_tuple=True) fw_cell_zero = fw_cell.zero_state(batch_size, self._dtype) bw_cell_zero = bw_cell.zero_state(batch_size, self._dtype) enc_out, _ = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, inputs, sequence_length=inputs_len, initial_state_fw=fw_cell_zero, initial_state_bw=bw_cell_zero) with tf.name_scope('attn-decoder'): dec_cell_in = rnn.GRUCell(self._dec_rnn_size) attn_values = tf.concat(enc_out, 2) attn_mech = seq2seq.BahdanauAttention(self._enc_rnn_size * 2, attn_values, inputs_len) dec_cell_attn = rnn.GRUCell(self._enc_rnn_size * 2) dec_cell_attn = seq2seq.AttentionWrapper(dec_cell_attn, attn_mech, self._enc_rnn_size * 2, alignment_history=alignment_history) dec_cell_out = rnn.GRUCell(self._output_size) dec_cell = rnn.MultiRNNCell([dec_cell_in, dec_cell_attn, dec_cell_out], state_is_tuple=True) dec = seq2seq.BasicDecoder(dec_cell, helper_build_fn(), dec_cell.zero_state(batch_size, self._dtype)) dec_out, dec_state = seq2seq.dynamic_decode(dec, output_time_major=False, maximum_iterations=decoder_maxiters, impute_finished=True) self.outputs = dec_out.rnn_output self.output_ids = dec_out.sample_id self.final_state = dec_state
def _set_train_model(self): """ define train graph :return: """ # Create the internal multi-layer cell for our RNN. if use_lstm: single_cell1 = LSTMCell(self.enc_hidden_size) single_cell2 = LSTMCell(self.dec_hidden_size) else: single_cell1 = GRUCell(self.enc_hidden_size) single_cell2 = GRUCell(self.dec_hidden_size) enc_cell = MultiRNNCell([single_cell1 for _ in range(self.enc_num_layers)]) dec_cell = MultiRNNCell([single_cell2 for _ in range(self.dec_num_layers)]) self.encoder_cell = enc_cell self.decoder_cell = dec_cell self._make_graph(forward_only) self.saver = tf.train.Saver(tf.global_variables())
def BidirectionalGRUEncoder(self,inputs,name): ''' inputs: [batch,max_time,embedding_size] output: [batch,max_time,2*hidden_size] ''' with tf.variable_scope(name): fw_gru_cell = rnn.GRUCell(self.hidden_size) bw_gru_cell = rnn.GRUCell(self.hidden_size) fw_gru_cell = rnn.DropoutWrapper(fw_gru_cell,output_keep_prob = self.dropout_keep_prob) bw_gru_cell = rnn.DropoutWrapper(bw_gru_cell,output_keep_prob = self.dropout_keep_prob) (fw_outputs,bw_outputs),(fw_outputs_sta,bw_outputs_sta) = tf.nn.bidirectional_dynamic_rnn( cell_fw = fw_gru_cell, cell_bw = bw_gru_cell, inputs = inputs, sequence_length = getSequenceRealLength(inputs), dtype = tf.float32) outputs = tf.concat((fw_outputs,bw_outputs),2) return outputs
def RNN(_X, _weights, _biases, lens): if FLAGS.unit == 'PLSTM': cell = PhasedLSTMCell(FLAGS.n_hidden, use_peepholes=True) elif FLAGS.unit == 'GRU': cell = GRUCell(FLAGS.n_hidden) elif FLAGS.unit == 'LSTM': cell = LSTMCell(FLAGS.n_hidden, use_peepholes=True) else: raise ValueError('Unit {} not implemented.'.format(FLAGS.unit)) outputs, states = tf.nn.dynamic_rnn(cell, _X, dtype=tf.float32, sequence_length=lens) # TODO better (?) in lack of smart indexing batch_size = tf.shape(outputs)[0] max_len = tf.shape(outputs)[1] out_size = int(outputs.get_shape()[2]) index = tf.range(0, batch_size) * max_len + (lens - 1) flat = tf.reshape(outputs, [-1, out_size]) relevant = tf.gather(flat, index) return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
def input_module(self): """encode raw texts into vector representation""" story_embedding=tf.nn.embedding_lookup(self.Embedding,self.story) # [batch_size,story_length,sequence_length,embed_size] story_embedding=tf.reshape(story_embedding,(self.batch_size,self.story_length,self.sequence_length*self.embed_size)) hidden_state=tf.ones((self.batch_size,self.hidden_size),dtype=tf.float32) cell = rnn.GRUCell(self.hidden_size) self.story_embedding,hidden_state=tf.nn.dynamic_rnn(cell,story_embedding,dtype=tf.float32,scope="input_module")
def question_module(self): """ input:tokens of query:[batch_size,sequence_length] :return: representation of question:[batch_size,hidden_size] """ query_embedding = tf.nn.embedding_lookup(self.Embedding, self.query) # [batch_size,sequence_length,embed_size] cell=rnn.GRUCell(self.hidden_size) _,self.query_embedding=tf.nn.dynamic_rnn(cell,query_embedding,dtype=tf.float32,scope="question_module") #query_embedding:[batch_size,hidden_size]
def _add_encoders(self): with tf.variable_scope('query_encoder'): query_encoder_cell = GRUCell(self.encoder_cell_state_size) if self.dropout_enabled and self.mode != 'decode': query_encoder_cell = DropoutWrapper(cell=query_encoder_cell, output_keep_prob=0.8) query_embeddings = tf.nn.embedding_lookup(self.embeddings, self.queries_placeholder) query_encoder_outputs, _ = rnn.dynamic_rnn(query_encoder_cell, query_embeddings, sequence_length=self.query_lengths_placeholder, swap_memory=True, dtype=tf.float32) self.query_last = query_encoder_outputs[:, -1, :] with tf.variable_scope('encoder'): fw_cell = GRUCell(self.encoder_cell_state_size) bw_cell = GRUCell(self.encoder_cell_state_size) if self.dropout_enabled and self.mode != 'decode': fw_cell = DropoutWrapper(cell=fw_cell, output_keep_prob=0.8) bw_cell = DropoutWrapper(cell=bw_cell, output_keep_prob=0.8) embeddings = tf.nn.embedding_lookup(self.embeddings, self.documents_placeholder) (encoder_outputs_fw, encoder_outputs_bw), _ = rnn.bidirectional_dynamic_rnn( fw_cell, bw_cell, embeddings, sequence_length=self.document_lengths_placeholder, swap_memory=True, dtype=tf.float32) self.encoder_outputs = tf.concat([encoder_outputs_fw, encoder_outputs_bw], 2) self.final_encoder_state = self.encoder_outputs[:, -1, :]
def __init__(self,n_classes,rnn_size = 256,n_chunks=75): global gru_cell_units self._name = "star_platinum" self._hidden_layer_1 = {'weights': tf.Variable(tf.random_uniform([rnn_size,1024]),name = "weight1"), 'biases': tf.Variable(tf.random_uniform([1024]),name = "biases1")} self._hidden_layer_2 = {'weights': tf.Variable(tf.random_uniform([1024,n_chunks * 10]),name = "weight2"), 'biases': tf.Variable(tf.random_uniform([n_chunks * 10]),name = "biases2")} self._lstm_cell = rnn.BasicLSTMCell(rnn_size) self._gru_cell = rnn.GRUCell(gru_cell_units) self._output = {'weights': tf.Variable(tf.random_uniform([gru_cell_units,n_classes]),name = "weight3"), 'biases': tf.Variable(tf.random_uniform([n_classes]),name = "biases3")}
def __init__(self,n_classes,rnn_size = 256): self._name = "little_gru" self._layer_weights = tf.Variable(tf.random_uniform([rnn_size,n_classes]), name="weights") self._layer_biases = tf.Variable(tf.random_uniform([n_classes]), name="biases") self._gru_cell = rnn.GRUCell(rnn_size)
def BidirectionalGRUEncoder(self, inputs, name): #??inputs?shape?[batch_size, max_time, voc_size] with tf.variable_scope(name): GRU_cell_fw = rnn.GRUCell(self.hidden_size) GRU_cell_bw = rnn.GRUCell(self.hidden_size) #fw_outputs?bw_outputs?size??[batch_size, max_time, hidden_size] ((fw_outputs, bw_outputs), (_, _)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=GRU_cell_fw, cell_bw=GRU_cell_bw, inputs=inputs, sequence_length=length(inputs), dtype=tf.float32) #outputs?size?[batch_size, max_time, hidden_size*2] outputs = tf.concat((fw_outputs, bw_outputs), 2) return outputs
def _get_rnn_unit(self, rnn_unit): if rnn_unit == 'lstm': fw_cell = rnn.BasicLSTMCell(self._nb_hidden, forget_bias=1., state_is_tuple=True) bw_cell = rnn.BasicLSTMCell(self._nb_hidden, forget_bias=1., state_is_tuple=True) elif rnn_unit == 'gru': fw_cell = rnn.GRUCell(self._nb_hidden) bw_cell = rnn.GRUCell(self._nb_hidden) else: raise ValueError('rnn_unit must in (lstm, gru)!') return fw_cell, bw_cell
def build_cell(units, cell_type='lstm', num_layers=1): if num_layers > 1: cell = rnn.MultiRNNCell([ build_cell(units, cell_type, 1) for _ in range(num_layers) ]) else: if cell_type == "lstm": cell = rnn.LSTMCell(units) elif cell_type == "gru": cell = rnn.GRUCell(units) else: raise ValueError('Do not support %s' % cell_type) return cell
def RNN(X, num_hidden_layers): # reshape to [1, n_input] std_dev_He = np.sqrt(2 / np.prod(X.get_shape().as_list()[1:])) X = tf.reshape(X, [-1, sequence_length* 8*8]) # Generate a n_input-element sequence of inputs # (eg. [had] [a] [general] -> [20] [6] [33]) X = tf.split(X, sequence_length, 1) # 1-layer LSTM with n_hidden units. # rnn_cell = rnn.BasicLSTMCell(n_hidden) with tf.variable_scope('RNN', tf.random_normal_initializer(mean=0.0, stddev=std_dev_He)): #tf.random_normal_initializer(mean=0.0, stddev=std_dev_He) #initializer=tf.contrib.layers.xavier_initializer() # weights = { # 'out': tf.Variable(tf.random_normal([num_hidden, num_classes])) # } # biases = { # 'out': tf.Variable(tf.random_normal([num_classes])) # } weights = tf.get_variable( name='weights', shape=[num_hidden, num_classes], # 1 x 64 filter in, 1 class out dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) biases = tf.get_variable( name='biases', shape=[num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) GRU_cell_layer = [rnn.GRUCell(num_hidden)] # LSTM_cell_layer = [rnn.BasicLSTMCell(num_hidden, forget_bias=1)] rnn_cell = rnn.MultiRNNCell(GRU_cell_layer * num_hidden_layers) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, X, dtype=tf.float32) # there are n_input outputs but # we only want the last output # return tf.matmul(outputs[-1], weights['out']) + biases['out'] return tf.matmul(outputs[-1], weights) + biases
def cell_create(self,scope_name): with tf.variable_scope(scope_name): if self.cell_type == 'tanh': cells = rnn.MultiRNNCell([rnn.BasicRNNCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) elif self.cell_type == 'LSTM': cells = rnn.MultiRNNCell([rnn.BasicLSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) elif self.cell_type == 'GRU': cells = rnn.MultiRNNCell([rnn.GRUCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) elif self.cell_type == 'LSTMP': cells = rnn.MultiRNNCell([rnn.LSTMCell(self.n_hidden[i]) for i in range(self.n_layers)], state_is_tuple=True) cells = rnn.DropoutWrapper(cells, input_keep_prob=self.dropout_ph,output_keep_prob=self.dropout_ph) return cells
def __init__(self, m, seq_len, name='gen', reuse=False, n_stack=1, logit_range=4.0, **kwargs): # Get GRU cell builder range_wrapper = partial(OutputRangeWrapper, output_range=logit_range) cb = GeneratorRNNCellBuilder( rnn.GRUCell, m=m, n_stack=n_stack, wrappers=[range_wrapper] ) # Super constructor super(GRUGenerator, self).__init__( m, seq_len, name=name, cell_builder=cb, reuse=reuse, **kwargs )
def _build_rnn_encoder(self, sentence1, sentence2, sentence1_lengths, sentence2_lengths): with tf.variable_scope('word_embedding'): sentence1_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence1) sentence2_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence2) with tf.variable_scope('rnn'): def _run_birnn(fw_cell, bw_cell, inputs, lengths): (fw_output, bw_output), (fw_final_state, bw_final_state) =\ tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs, sequence_length=lengths, time_major=False, dtype=tf.float32 ) output = tf.concat([fw_output, bw_output], 2) state = tf.concat([fw_final_state, bw_final_state], 1) return output, state state_size = self.config['rnn']['state_size'] forward_cell = GRUCell(state_size) backward_cell = GRUCell(state_size) sentence1_rnned, _ = _run_birnn(forward_cell, backward_cell, sentence1_embedding, sentence1_lengths) sentence2_rnned, _ = _run_birnn(forward_cell, backward_cell, sentence2_embedding, sentence2_lengths) return sentence1_embedding, sentence2_embedding, \ sentence1_rnned, sentence2_rnned
def _build_rnn_encoder(self, sentence1, sentence2_pos, sentence2_neg, sentence1_lengths, sentence2_pos_lengths, sentence2_neg_lengths): with tf.variable_scope('word_embedding'): sentence1_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence1) sentence2_pos_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence2_pos) sentence2_neg_embedding = tf.nn.embedding_lookup(self._word_embedding, sentence2_neg) with tf.variable_scope('rnn'): def _run_birnn(fw_cell, bw_cell, inputs, lengths): (fw_output, bw_output), (fw_final_state, bw_final_state) =\ tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, inputs, sequence_length=lengths, time_major=False, dtype=tf.float32 ) output = tf.concat([fw_output, bw_output], 2) state = tf.concat([fw_final_state, bw_final_state], 1) return output, state state_size = self.config['rnn']['state_size'] forward_cell = GRUCell(state_size) backward_cell = GRUCell(state_size) sentence1_rnned, _ = _run_birnn(forward_cell, backward_cell, sentence1_embedding, sentence1_lengths) sentence2_rnned, _ = _run_birnn( forward_cell, backward_cell, tf.concat([sentence2_pos_embedding, sentence2_neg_embedding], 0), tf.concat([sentence2_pos_lengths, sentence2_neg_lengths], 0)) sentence2_pos_rnned, sentence2_neg_rnned = \ tf.split(sentence2_rnned, num_or_size_splits=2, axis=0) return sentence1_embedding, sentence2_pos_embedding, sentence2_neg_embedding, \ sentence1_rnned, sentence2_pos_rnned, sentence2_neg_rnned
def __init__(self, num_units, tied=False, non_recurrent_fn=None): super(Grid2GRUCell, self).__init__( num_units=num_units, num_dims=2, input_dims=0, output_dims=0, priority_dims=0, tied=tied, non_recurrent_dims=None if non_recurrent_fn is None else 0, cell_fn=lambda n, i: rnn.GRUCell(num_units=n, input_size=i), non_recurrent_fn=non_recurrent_fn)
def HAN_model_1(session, restore_only=False): """Hierarhical Attention Network""" import tensorflow as tf try: from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, DropoutWrapper except ImportError: MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell GRUCell = tf.nn.rnn_cell.GRUCell from bn_lstm import BNLSTMCell from HAN_model import HANClassifierModel is_training = tf.placeholder(dtype=tf.bool, name='is_training') cell = BNLSTMCell(80, is_training) # h-h batchnorm LSTMCell # cell = GRUCell(30) cell = MultiRNNCell([cell]*5) model = HANClassifierModel( vocab_size=vocab_size, embedding_size=200, classes=classes, word_cell=cell, sentence_cell=cell, word_output_size=100, sentence_output_size=100, device=args.device, learning_rate=args.lr, max_grad_norm=args.max_grad_norm, dropout_keep_proba=0.5, is_training=is_training, ) saver = tf.train.Saver(tf.global_variables()) checkpoint = tf.train.get_checkpoint_state(checkpoint_dir) if checkpoint: print("Reading model parameters from %s" % checkpoint.model_checkpoint_path) saver.restore(session, checkpoint.model_checkpoint_path) elif restore_only: raise FileNotFoundError("Cannot restore model") else: print("Created model with fresh parameters") session.run(tf.global_variables_initializer()) # tf.get_default_graph().finalize() return model, saver
def __init__(self, data, model='lstm', infer=False): self.rnn_size = 128 self.n_layers = 2 if infer: self.batch_size = 1 else: self.batch_size = data.batch_size if model == 'rnn': cell_rnn = rnn.BasicRNNCell elif model == 'gru': cell_rnn = rnn.GRUCell elif model == 'lstm': cell_rnn = rnn.BasicLSTMCell cell = cell_rnn(self.rnn_size, state_is_tuple=False) self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False) self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None]) self.initial_state = self.cell.zero_state(self.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size]) softmax_b = tf.get_variable("softmax_b", [data.words_size]) with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [data.words_size, self.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.x_tf) outputs, final_state = tf.nn.dynamic_rnn( self.cell, inputs, initial_state=self.initial_state, scope='rnnlm') self.output = tf.reshape(outputs, [-1, self.rnn_size]) self.logits = tf.matmul(self.output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.final_state = final_state pred = tf.reshape(self.y_tf, [-1]) # seq2seq loss = seq2seq.sequence_loss_by_example([self.logits], [pred], [tf.ones_like(pred, dtype=tf.float32)],) self.cost = tf.reduce_mean(loss) self.learning_rate = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, encoder_size, decoder_size, encoder_vocab_size, decoder_vocab_size, encoder_layer_size, decoder_layer_size, RNN_type='LSTM', encoder_input_keep_prob=1.0, encoder_output_keep_prob=1.0, decoder_input_keep_prob=1.0, decoder_output_keep_prob=1.0, learning_rate=0.01, hidden_size=128): self.encoder_size = encoder_size self.decoder_size = decoder_size self.encoder_vocab_size = encoder_vocab_size self.decoder_vocab_size = decoder_vocab_size self.encoder_layer_size = encoder_layer_size self.decoder_layer_size = decoder_layer_size self.encoder_input_keep_prob = encoder_input_keep_prob self.encoder_output_keep_prob = encoder_output_keep_prob self.decoder_input_keep_prob = decoder_input_keep_prob self.decoder_output_keep_prob = decoder_output_keep_prob self.learning_rate = learning_rate self.hidden_size = hidden_size self.encoder_input = tf.placeholder(tf.float32, shape=(None, self.encoder_size, self.encoder_vocab_size)) self.decoder_input = tf.placeholder(tf.float32, shape=(None, self.decoder_size, self.decoder_vocab_size)) self.target_input = tf.placeholder(tf.int32, shape=(None, self.decoder_size)) self.weight = tf.get_variable(shape=[self.hidden_size, self.decoder_vocab_size], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, name='weight') self.bias = tf.get_variable(shape=[self.decoder_vocab_size], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32, name='bias') self.logits = None self.cost = None self.train_op = None self.RNNCell = None self.outputs = None self.merged = None if RNN_type == 'LSTM': self.RNNCell = rnn.LSTMCell elif RNN_type == 'GRU': self.RNNCell = rnn.GRUCell else: raise Exception('not support {} RNN type'.format(RNN_type)) self.build_model() self.saver = tf.train.Saver(tf.global_variables())
def __init__(self, n_hidden, cell="GRU"): """ qa_rnn module init. :param n_hidden: num of hidden units :param cell: gru|lstm|basic_rnn """ self.rnn_cell = rnn.BasicRNNCell(num_units=n_hidden) if cell == "GRU": self.rnn_cell = rnn.GRUCell(num_units=n_hidden) elif cell == "LSTM": self.rnn_cell = rnn.LSTMCell(num_units=n_hidden) else: raise Exception(cell + " not supported.")
def attention_decoder(enc, length, state_transfer_helper, voca_size=20, max_length=None, name=None, reuse=None): with tf.variable_scope(name, "attention-decoder", values=[enc, length], reuse=reuse) as scope: # get shapes batch_size = enc.get_shape().as_list()[0] if batch_size is None: batch_size = tf.shape(enc)[0] dims = int(enc.get_shape()[-1]) # decoder dec_attn = seq2seq.DynamicAttentionWrapper( cell=rnn.GRUCell(dims, reuse=scope.reuse), attention_mechanism=seq2seq.LuongAttention(dims, enc, length), attention_size=dims ) dec_network = rnn.MultiRNNCell([ rnn.GRUCell(dims, reuse=scope.reuse), dec_attn, rnn.GRUCell(voca_size, reuse=scope.reuse) ], state_is_tuple=True) decoder = seq2seq.BasicDecoder( dec_network, state_transfer_helper(), initial_state=dec_network.zero_state(batch_size, tf.float32) ) dec_outputs, _ = seq2seq.dynamic_decode( decoder, maximum_iterations=max_length, impute_finished=False ) logits = dec_outputs.rnn_output labels = dec_outputs.sample_id # pad logits and labels if max_length is not None: logits = dynamic_time_pad(logits, max_length) labels = dynamic_time_pad(labels, max_length) return logits, labels
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 additional_cell_args = {} if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell elif args.model == 'gridlstm': cell_fn = grid_rnn.Grid2LSTMCell additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0}) elif args.model == 'gridgru': cell_fn = grid_rnn.Grid2GRUCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, **additional_cell_args) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup(embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') # output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def inference(self): ''' 1. embedding layer 2. Bi-LSTM layer 3. concat Bi-LSTM output 4. FC(full connected) layer 5. softmax layer ''' #embedding layer with tf.device('/cpu:0'),tf.name_scope('embedding'): self.embedded_words = tf.nn.embedding_lookup(self.Embedding,self.input_x) #Bi-LSTM layer lstm_fw_cell = rnn.BasicLSTMCell(self.hidden_size) lstm_bw_cell = rnn.BasicLSTMCell(self.hidden_size) if self.dropout_keep_prob is not None: lstm_fw_cell = rnn.DropoutWrapper(lstm_fw_cell,output_keep_prob = self.dropout_keep_prob) lstm_bw_cell = rnn.DropoutWrapper(lstm_bw_cell,output_keep_prob = self.dropout_keep_prob) outputs,output_states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell,lstm_bw_cell,self.embedded_words,dtype = tf.float32) #BI-GRU layer ''' gru_fw_cell = rnn.GRUCell(self.hidden_size) gru_bw_cell = rnn.GRUCell(self.hidden_size) if self.dropout_keep_prob is not None: gru_fw_cell = rnn.DropoutWrapper(gru_fw_cell,output_keep_prob = self.dropout_keep_prob) gru_bw_cell = rnn.DropoutWrapper(gru_bw_cell,output_keep_prob = self.dropout_keep_prob) outputs,output_states = tf.nn.bidirectional_dynamic_rnn(gru_fw_cell,gru_bw_cell,self.embedded_words,dtype = tf.float32) ''' #concat output #each output in outputs is [batch sequence_length hidden_size] #concat forward output and backward output output_cnn = tf.concat(outputs,axis = 2) #[batch sequence_length 2*hidden_size] output_cnn_last = tf.reduce_mean(output_cnn,axis = 1) #[batch_size,2*hidden_size] #FC layer with tf.name_scope('output'): self.score = tf.matmul(output_cnn_last,self.W_projection) + self.b_projection return self.score
def __init__(self, args, reverse_input, infer=False): if reverse_input: self.start_token = special_tokens.END_TOKEN self.end_token = special_tokens.START_TOKEN else: self.start_token = special_tokens.START_TOKEN self.end_token = special_tokens.END_TOKEN self.unk_token = special_tokens.UNK_TOKEN self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, state_is_tuple=True) self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))