我们从Python开源项目中,提取了以下29个代码示例,用于说明如何使用tensorflow.python.ops.rnn.dynamic_rnn()。
def testBuildAndTrain(self): inputs = tf.random_normal([TIME_STEPS, BATCH_SIZE, INPUT_SIZE]) output, _ = rnn.dynamic_rnn( cell=self.module, inputs=inputs, initial_state=self.initial_state, time_major=True) targets = np.random.rand(TIME_STEPS, BATCH_SIZE, NUM_READS, WORD_SIZE) loss = tf.reduce_mean(tf.square(output - targets)) train_op = tf.train.GradientDescentOptimizer(1).minimize(loss) init = tf.global_variables_initializer() with self.test_session(): init.run() train_op.run()
def compute_states(self,emb): def unpack_sequence(tensor): return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2])) with tf.variable_scope("Composition",initializer= tf.contrib.layers.xavier_initializer(),regularizer= tf.contrib.layers.l2_regularizer(self.reg)): cell = rnn_cell.LSTMCell(self.hidden_dim) #tf.cond(tf.less(self.dropout #if tf.less(self.dropout, tf.constant(1.0)): cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.dropout,input_keep_prob=self.dropout) #output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32) outputs,_=rnn.rnn(cell,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32) #output = pack_sequence(outputs) sum_out=tf.reduce_sum(tf.pack(outputs),[0]) sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1)) final_state=sent_rep return final_state
def testDynamicAttentionDecoder1(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): cell = core_rnn_cell_impl.GRUCell(2) inp = constant_op.constant(0.5, shape=[2, 2, 2]) enc_outputs, enc_state = rnn.dynamic_rnn( cell, inp, dtype=dtypes.float32) attn_states = enc_outputs dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 dec, mem = seq2seq_lib.attention_decoder( dec_inp, enc_state, attn_states, cell, output_size=4) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def _add_encoders(self): with tf.variable_scope('query_encoder'): query_encoder_cell = GRUCell(self.encoder_cell_state_size) if self.dropout_enabled and self.mode != 'decode': query_encoder_cell = DropoutWrapper(cell=query_encoder_cell, output_keep_prob=0.8) query_embeddings = tf.nn.embedding_lookup(self.embeddings, self.queries_placeholder) query_encoder_outputs, _ = rnn.dynamic_rnn(query_encoder_cell, query_embeddings, sequence_length=self.query_lengths_placeholder, swap_memory=True, dtype=tf.float32) self.query_last = query_encoder_outputs[:, -1, :] with tf.variable_scope('encoder'): fw_cell = GRUCell(self.encoder_cell_state_size) bw_cell = GRUCell(self.encoder_cell_state_size) if self.dropout_enabled and self.mode != 'decode': fw_cell = DropoutWrapper(cell=fw_cell, output_keep_prob=0.8) bw_cell = DropoutWrapper(cell=bw_cell, output_keep_prob=0.8) embeddings = tf.nn.embedding_lookup(self.embeddings, self.documents_placeholder) (encoder_outputs_fw, encoder_outputs_bw), _ = rnn.bidirectional_dynamic_rnn( fw_cell, bw_cell, embeddings, sequence_length=self.document_lengths_placeholder, swap_memory=True, dtype=tf.float32) self.encoder_outputs = tf.concat([encoder_outputs_fw, encoder_outputs_bw], 2) self.final_encoder_state = self.encoder_outputs[:, -1, :]
def fprop(self, inputs): with tf.variable_scope('model', values=[inputs]): one_hot_inputs = tf.one_hot(inputs, self.n_tokens, axis=-1) with tf.variable_scope('rnn', values=[inputs]): states, _ = dynamic_rnn(cell=IsanCell(self.hidden_dim), inputs=one_hot_inputs, dtype=tf.float32) Wo = tf.get_variable('Wo', shape=[self.hidden_dim, self.target_dim], initializer=tf.random_normal_initializer( stddev=1.0 / (self.hidden_dim + self.target_dim) ** 2)) bo = tf.get_variable('bo', shape=[1, self.target_dim], initializer=tf.zeros_initializer()) bs, t = inputs.get_shape().as_list() logits = tf.matmul(tf.reshape(states, [t * bs, self.hidden_dim]), Wo) + bo logits = tf.reshape(logits, [bs, t, self.target_dim]) return logits
def crf_log_norm(inputs, sequence_lengths, transition_params): """Computes the normalization for a CRF. Args: inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials to use as input to the CRF layer. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix. Returns: log_norm: A [batch_size] vector of normalizers for a CRF. """ # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def crf_log_norm(inputs, sequence_lengths, transition_params): """Computes the normalization for a CRF. Args: inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials to use as input to the CRF layer. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix. Returns: log_norm: A [batch_size] vector of normalizers for a CRF. """ # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) ''' tf.nn.rnn creates an unrolled graph for a fixed RNN length. That means, if you call tf.nn.rnn with inputs having 200 time steps you are creating a static graph with 200 RNN steps. First, graph creation is slow. Second, you’re unable to pass in longer sequences (> 200) than you’ve originally specified.tf.nn.dynamic_rnn solves this. It uses a tf.While loop to dynamically construct the graph when it is executed. That means graph creation is faster and you can feed batches of variable size. ''' _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) ''' ''' log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def apply(self, is_train, x, mask=None): state = dynamic_rnn(self.cell_spec(is_train), x, mask, dtype=tf.float32)[1] if isinstance(self.output, int): return state[self.output] else: if self.output is None: if not isinstance(state, tf.Tensor): raise ValueError() return state for i,x in enumerate(state._fields): if x == self.output: return state[i] raise ValueError()
def apply(self, is_train, inputs, mask=None): cell = self.cell_spec(is_train) batch_size = inputs.shape.as_list()[0] if self.learn_initial: initial = self.cell_spec.build_initial_state_var(batch_size, cell) else: initial = None return dynamic_rnn(cell, inputs, mask, initial, dtype=tf.float32)[0]
def RNN(inputs, lens, name, reuse): print ("Building network " + name) # Define weights weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights") biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases") # Define a lstm cell with tensorflow outputs, states = rnn.dynamic_rnn( __cell_kind(__n_hidden), inputs, sequence_length=lens, dtype=tf.float32, scope=name, time_major=False) assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden) print ("Done building network " + name) # # All these asserts are actually documentation: they can't be out of date # outputs = tf.expand_dims(outputs, 2) assert outputs.get_shape() == (__batch_size, __n_steps, 1, __n_hidden) tiled_weights = tf.tile(tf.expand_dims(tf.expand_dims(weights, 0), 0), [__batch_size, __n_steps, 1, 1]) assert tiled_weights.get_shape() == (__batch_size, __n_steps, __n_hidden, n_output) #assert tiled_weights.get_shape() == (1, 1, __n_hidden, n_output) # Linear activation, using rnn inner loop output for each char finals = tf.batch_matmul(outputs, tiled_weights) + biases assert finals.get_shape() == (__batch_size, __n_steps, 1, n_output) return tf.squeeze(finals) # tf Graph input
def RNN(inputs, lens, name, reuse): print ("Building network " + name) # Define weights inputs = tf.gather(one_hots, inputs) weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights") biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases") # Define a lstm cell with tensorflow outputs, states = rnn.dynamic_rnn( __cell_kind(__n_hidden), inputs, sequence_length=lens, dtype=tf.float32, scope=name, time_major=False) assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden) print ("Done building network " + name) # # All these asserts are actually documentation: they can't be out of date # outputs = tf.expand_dims(outputs, 2) assert outputs.get_shape() == (__batch_size, __n_steps, 1, __n_hidden) tiled_weights = tf.tile(tf.expand_dims(tf.expand_dims(weights, 0), 0), [__batch_size, __n_steps, 1, 1]) assert tiled_weights.get_shape() == (__batch_size, __n_steps, __n_hidden, n_output) #assert tiled_weights.get_shape() == (1, 1, __n_hidden, n_output) # Linear activation, using rnn inner loop output for each char finals = tf.batch_matmul(outputs, tiled_weights) + biases assert finals.get_shape() == (__batch_size, __n_steps, 1, n_output) return tf.squeeze(finals) # tf Graph input #pat_chars = tf.placeholder(tf.float32, [__batch_size, __n_steps, n_input])
def RNN(inputs, lens, name, reuse): print ("Building network " + name) # Define weights inputs = tf.gather(one_hots, inputs) weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights") biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases") # Define a lstm cell with tensorflow outputs, states = rnn.dynamic_rnn( __cell_kind(__n_hidden), inputs, sequence_length=lens, dtype=tf.float32, scope=name, time_major=False) # Prepare data shape to match `rnn` function requirements # Current data input shape: (__batch_size, __n_steps, n_input) # Required shape: '__n_steps' tensors list of shape (__batch_size, n_input) '''outputs, states = rnn.rnn( __cell_kind(__n_hidden), tf.unpack(tf.transpose(inputs, [1, 0, 2])), sequence_length=lens, dtype=tf.float32, scope=name) outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])''' print ("Done building network " + name) # Asserts are actually documentation: they can't be out of date assert outputs.get_shape() == (__batch_size, __n_steps, __n_hidden) # Linear activation, using rnn output for each char # Reshaping here for a `batch` matrix multiply # It's faster than `batch_matmul` probably because it can guarantee a # static shape outputs = tf.reshape(outputs, [__batch_size * __n_steps, __n_hidden]) finals = tf.matmul(outputs, weights) return tf.reshape(finals, [__batch_size, __n_steps, n_output]) + biases # tf Graph input
def standard_lstm(input_data, rnn_size): b, h, w, c = input_data.get_shape().as_list() new_input_data = tf.reshape(input_data, (b, h * w, c)) rnn_out, _ = dynamic_rnn(tf.contrib.rnn.LSTMCell(rnn_size), inputs=new_input_data, dtype=tf.float32) rnn_out = tf.reshape(rnn_out, (b, h, w, rnn_size)) return rnn_out
def compute_states(self,emb): def unpack_sequence(tensor): return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2])) with tf.variable_scope("Composition",initializer= tf.contrib.layers.xavier_initializer(),regularizer= tf.contrib.layers.l2_regularizer(self.reg)): cell_fw = rnn_cell.LSTMCell(self.hidden_dim) cell_bw = rnn_cell.LSTMCell(self.hidden_dim) #tf.cond(tf.less(self.dropout #if tf.less(self.dropout, tf.constant(1.0)): cell_fw = rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=self.dropout,input_keep_prob=self.dropout) cell_bw=rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=self.dropout,input_keep_prob=self.dropout) #output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32) outputs,_,_=rnn.bidirectional_rnn(cell_fw,cell_bw,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32) #output = pack_sequence(outputs) sum_out=tf.reduce_sum(tf.pack(outputs),[0]) sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1)) final_state=sent_rep return final_state
def get_last_hidden_state(self, sentence, init_hidden_state=None): assert isinstance(sentence, Sentence) with tf.variable_scope(self.scope, reuse=self.used): J = sentence.shape[-1] Ax = tf.nn.embedding_lookup(self.emb_mat, sentence.x) # [N, C, J, e] F = reduce(mul, sentence.shape[:-1], 1) init_hidden_state = init_hidden_state or self.cell.zero_state(F, tf.float32) Ax_flat = tf.reshape(Ax, [F, J, self.input_size]) x_len_flat = tf.reshape(sentence.x_len, [F]) # Ax_flat_split = [tf.squeeze(x_flat_each, [1]) for x_flat_each in tf.split(1, J, Ax_flat)] o_flat, h_flat = rnn.dynamic_rnn(self.cell, Ax_flat, x_len_flat, initial_state=init_hidden_state) self.used = True return h_flat
def ndlstm_base_dynamic(inputs, noutput, scope=None, reverse=False): """Run an LSTM, either forward or backward. This is a 1D LSTM implementation using dynamic_rnn and the TensorFlow LSTM op. Args: inputs: input sequence (length, batch_size, ninput) noutput: depth of output scope: optional scope name reverse: run LSTM in reverse Returns: Output sequence (length, batch_size, noutput) """ with variable_scope.variable_scope(scope, "SeqLstm", [inputs]): # TODO(tmb) make batch size, sequence_length dynamic # example: sequence_length = tf.shape(inputs)[0] _, batch_size, _ = _shape(inputs) lstm_cell = core_rnn_cell_impl.BasicLSTMCell(noutput, state_is_tuple=False) state = array_ops.zeros([batch_size, lstm_cell.state_size]) sequence_length = int(inputs.get_shape()[0]) sequence_lengths = math_ops.to_int64( array_ops.fill([batch_size], sequence_length)) if reverse: inputs = array_ops.reverse_v2(inputs, [0]) outputs, _ = rnn.dynamic_rnn( lstm_cell, inputs, sequence_lengths, state, time_major=True) if reverse: outputs = array_ops.reverse_v2(outputs, [0]) return outputs
def ndlstm_base(inputs, noutput, scope=None, reverse=False, dynamic=True): """Implements a 1D LSTM, either forward or backward. This is a base case for multidimensional LSTM implementations, which tend to be used differently from sequence-to-sequence implementations. For general 1D sequence to sequence transformations, you may want to consider another implementation from TF slim. Args: inputs: input sequence (length, batch_size, ninput) noutput: depth of output scope: optional scope name reverse: run LSTM in reverse dynamic: use dynamic_rnn Returns: Output sequence (length, batch_size, noutput) """ # TODO(tmb) maybe add option for other LSTM implementations, like # slim.rnn.basic_lstm_cell if dynamic: return ndlstm_base_dynamic(inputs, noutput, scope=scope, reverse=reverse) else: return ndlstm_base_unrolled(inputs, noutput, scope=scope, reverse=reverse)
def run_lstm_mnist(lstm_cell=BasicLSTMCell, hidden_size=32, batch_size=256, steps=1000): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) learning_rate = 0.001 file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'training_accuracy']) x = tf.placeholder('float32', [batch_size, 784, 2 if lstm_cell == PhasedLSTMCell else 1]) y_ = tf.placeholder('float32', [batch_size, 10]) initial_states = (tf.random_normal([batch_size, hidden_size], stddev=0.1), tf.random_normal([batch_size, hidden_size], stddev=0.1)) outputs, _ = dynamic_rnn(lstm_cell(hidden_size), x, initial_state=initial_states, dtype=tf.float32) rnn_out = tf.squeeze(outputs[:, -1, :]) fc0_w = create_weight_variable('fc0_w', [hidden_size, 10]) fc0_b = create_bias_variable('fc0_b', [10]) y = tf.matmul(rnn_out, fc0_w) + fc0_b cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) grad_update = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(tf.global_variables_initializer()) def transform_x(_x_): if lstm_cell == PhasedLSTMCell: t = np.reshape(np.tile(np.array(range(784)), (batch_size, 1)), (batch_size, 784)) return np.squeeze(np.stack([_x_, t], axis=2)) t_x = np.expand_dims(_x_, axis=2) return t_x for i in range(steps): batch = mnist.train.next_batch(batch_size) st = time() tr_loss, tr_acc, _ = sess.run([cross_entropy, accuracy, grad_update], feed_dict={x: transform_x(batch[0]), y_: batch[1]}) print('Forward-Backward pass took {0:.2f}s to complete.'.format(time() - st)) file_logger.write([i, tr_loss, tr_acc]) file_logger.close()
def sentence_embedding_rnn(_encoder_inputs, vocab_size, cell, embedding_size, mask=None, dtype=dtypes.float32, scope=None, reuse_scop=None): """ """ with variable_scope.variable_scope("embedding_rnn", reuse=reuse_scop): # encoder_cell = rnn_cell.EmbeddingWrapper( # cell, embedding_classes=vocab_size, # embedding_size=embedding_size) # Divde encoder_inputs by given input_mask if mask != None: encoder_inputs = [[] for _ in mask] _mask = 0 for num in range(len(_encoder_inputs)): encoder_inputs[_mask].append(_encoder_inputs[num]) if num == mask[_mask]: _mask += 1 else: encoder_inputs = [] encoder_inputs.append(_encoder_inputs) encoder_state = None encoder_states = [] for encoder_input in encoder_inputs: if encoder_state == []: _, encoder_state = rnn.dynamic_rnn(encoder_cell, encoder_input, dtype=dtype) else: _, encoder_state = rnn.dynamic_rnn(encoder_cell, encoder_input, encoder_state, dtype=dtype) encoder_states.append(encoder_state) return encoder_states # def def_feedforward_nn(input_size, l1_size, l2_size): # with tf.variable_scope("episodic"): # l1_weights = tf.get_variable("l1_weights", [input_size, l1_size]) # l1_biases = tf.get_variable("l1_biases", [l1_size]) # l2_weights = tf.get_variable("l2_weights", [l1_size, l2_size]) # l2_biases = tf.get_variable("l2_biases", [l2_size]) #def feedforward_nn(l1_input, input_size, l1_size, l2_size): # with tf.variable_scope("episodic"): # l1_weights = tf.get_variable("l1_weights", [input_size, l1_size]) # l1_biases = tf.get_variable("l1_biases", [l1_size]) # l2_weights = tf.get_variable("l2_weights", [l1_size, l2_size]) # l2_biases = tf.get_variable("l2_biases", [l2_size]) # l2_input = tf.tanh(tf.matmul(l1_input , l1_weights) + l1_biases) # gate_prediction = tf.matmul(l2_input , l2_weights) + l2_biases # return gate_prediction
def __init__(self, cell, target_column, optimizer, model_dir=None, config=None, gradient_clipping_norm=None, inputs_key='inputs', sequence_length_key='sequence_length', initial_state_key='initial_state', dtype=None, parallel_iterations=None, swap_memory=False, name=None): """Initialize `DynamicRNNEstimator`. Args: cell: an initialized `RNNCell` to be used in the RNN. target_column: an initialized `TargetColumn`, used to calculate loss and metrics. optimizer: an initialized `tensorflow.Optimizer`. model_dir: The directory in which to save and restore the model graph, parameters, etc. config: A `RunConfig` instance. gradient_clipping_norm: parameter used for gradient clipping. If `None`, then no clipping is performed. inputs_key: the key for input values in the features dict passed to `fit()`. sequence_length_key: the key for the sequence length tensor in the features dict passed to `fit()`. initial_state_key: the key for input values in the features dict passed to `fit()`. dtype: Parameter passed ot `dynamic_rnn`. The dtype of the state and output returned by `RNNCell`. parallel_iterations: Parameter passed ot `dynamic_rnn`. The number of iterations to run in parallel. swap_memory: Parameter passed ot `dynamic_rnn`. Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. name: Optional name for the `Estimator`. """ super(_DynamicRNNEstimator, self).__init__( model_dir=model_dir, config=config) self._cell = cell self._target_column = target_column self._optimizer = optimizer self._gradient_clipping_norm = gradient_clipping_norm self._inputs_key = inputs_key self._sequence_length_key = sequence_length_key self._initial_state_key = initial_state_key self._dtype = dtype or dtypes.float32 self._parallel_iterations = parallel_iterations self._swap_memory = swap_memory self._name = name or 'DynamicRnnEstimator'
def _construct_rnn(self, features): """Apply an RNN to `features`. The `features` dict must contain `self._inputs_key`, and the corresponding input should be a `Tensor` of shape `[batch_size, padded_length, k]` where `k` is the dimension of the input for each element of a sequence. `activations` has shape `[batch_size, sequence_length, n]` where `n` is `self._target_column.num_label_columns`. In the case of a multiclass classifier, `n` is the number of classes. `final_state` has shape determined by `self._cell` and its dtype must match `self._dtype`. Args: features: a `dict` containing the input for the RNN and (optionally) an initial state and information about sequence lengths. Returns: activations: the output of the RNN, projected to the appropriate number of dimensions. final_state: the final state output by the RNN. Raises: KeyError: if `features` does not contain `self._inputs_key`. """ with ops.name_scope('RNN'): inputs = features.get(self._inputs_key) if inputs is None: raise KeyError('features must contain the key {}'.format( self._inputs_key)) if inputs.dtype != self._dtype: inputs = math_ops.cast(inputs, self._dtype) initial_state = features.get(self._initial_state_key) rnn_outputs, final_state = rnn.dynamic_rnn( cell=self._cell, inputs=inputs, initial_state=initial_state, dtype=self._dtype, parallel_iterations=self._parallel_iterations, swap_memory=self._swap_memory, time_major=False) activations = layers.fully_connected( inputs=rnn_outputs, num_outputs=self._target_column.num_label_columns, activation_fn=None, trainable=False) return activations, final_state
def construct_rnn(initial_state, sequence_input, cell, num_label_columns, dtype=dtypes.float32, parallel_iterations=32, swap_memory=False): """Build an RNN and apply a fully connected layer to get the desired output. Args: initial_state: The initial state to pass the the RNN. If `None`, the default starting state for `self._cell` is used. sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]` that will be passed as input to the RNN. cell: An initialized `RNNCell`. num_label_columns: The desired output dimension. dtype: dtype of `cell`. parallel_iterations: Number of iterations to run in parallel. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. Returns: activations: The output of the RNN, projected to `num_label_columns` dimensions. final_state: The final state output by the RNN. """ with ops.name_scope('RNN'): rnn_outputs, final_state = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, initial_state=initial_state, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=False) activations = layers.fully_connected( inputs=rnn_outputs, num_outputs=num_label_columns, activation_fn=None, trainable=True) return activations, final_state
def _apply(self, X, state=None, memory=None): # time_major: The shape format of the `inputs` and `outputs` Tensors. # If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. # If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. # ====== create attention if necessary ====== # cell = self.cell if self.bidirectional: cell_bw = self.cell_bw # create attention cell if self.attention: if not hasattr(self, "_cell_with_attention"): self._cell_with_attention = self.__attention_creator( cell, X=X, memory=memory) cell = self._cell_with_attention # bidirectional attention if self.bidirectional: if not hasattr(self, "_cell_with_attention_bw"): self._cell_with_attention_bw = self.__attention_creator( cell_bw, X=X, memory=memory) cell_bw = self._cell_with_attention_bw # ====== calling rnn_warpper ====== # ## Bidirectional if self.bidirectional: rnn_func = rnn.bidirectional_dynamic_rnn if self.dynamic \ else rnn.static_bidirectional_rnn state_fw, state_bw = None, None if isinstance(state, (tuple, list)): state_fw = state[0] if len(state) > 1: state_bw = state[1] else: state_fw = state outputs = rnn_func(cell_fw=cell, cell_bw=cell_bw, inputs=X, initial_state_fw=state_fw, initial_state_bw=state_bw, dtype=X.dtype.base_dtype) ## Unidirectional else: rnn_func = rnn.dynamic_rnn if self.dynamic else rnn.static_rnn outputs = rnn_func(cell, inputs=X, initial_state=state, dtype=X.dtype.base_dtype) # ====== initialize cell ====== # if not self._is_initialized_variables: # initialize only once, everytime you call this, the values of # variables changed K.eval(tf.variables_initializer(self.variables)) self._is_initialized_variables = True _infer_variable_role(self.variables) # ====== return ====== # if self.bidirectional: # concat outputs outputs = (tf.concat(outputs[0], axis=-1), outputs[1]) if not self.return_states: return outputs[0] return outputs
def RNN(inputs, lens, name, reuse): print ("Building network " + name) # Define weights inputs = tf.gather(one_hots, inputs) weights = tf.Variable(tf.random_normal([__n_hidden, n_output]), name=name+"_weights") biases = tf.Variable(tf.random_normal([n_output]), name=name+"_biases") # Define a lstm cell with tensorflow enc_outputs, enc_states = rnn.dynamic_rnn( __cell_kind(__n_hidden), inputs, sequence_length=lens, dtype=tf.float32, scope=name, time_major=False) dec_outputs, dec_states = rnn.dynamic_rnn( __cell_kind(__n_hidden), enc_outputs, sequence_length=lens, dtype=tf.float32, scope=name, time_major=False) # Prepare data shape to match `rnn` function requirements # Current data input shape: (__batch_size, __n_steps, n_input) # Required shape: '__n_steps' tensors list of shape (__batch_size, n_input) '''dec_outputs, dec_states = rnn.rnn( __cell_kind(__n_hidden), tf.unpack(tf.transpose(inputs, [1, 0, 2])), sequence_length=lens, dtype=tf.float32, scope=name) outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])''' print ("Done building network " + name) # Asserts are actually documentation: they can't be out of date assert dec_outputs.get_shape() == (__batch_size, __n_steps, __n_hidden) # Linear activation, using rnn output for each char # Reshaping here for a `batch` matrix multiply # It's faster than `batch_matmul` probably because it can guarantee a # static shape outputs = tf.reshape(dec_outputs, [__batch_size * __n_steps, __n_hidden]) finals = tf.matmul(outputs, weights) finals = tf.reshape(finals, [__batch_size, __n_steps, n_output]) + biases return finals[:, :__n_steps-1, :] # tf Graph input
def construct_rnn(initial_state, sequence_input, cell, num_label_columns, dtype=dtypes.float32, parallel_iterations=32, swap_memory=True): """Build an RNN and apply a fully connected layer to get the desired output. Args: initial_state: The initial state to pass the RNN. If `None`, the default starting state for `self._cell` is used. sequence_input: A `Tensor` with shape `[batch_size, padded_length, d]` that will be passed as input to the RNN. cell: An initialized `RNNCell`. num_label_columns: The desired output dimension. dtype: dtype of `cell`. parallel_iterations: Number of iterations to run in parallel. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. Returns: activations: The output of the RNN, projected to `num_label_columns` dimensions. final_state: A `Tensor` or nested tuple of `Tensor`s representing the final state output by the RNN. """ with ops.name_scope('RNN'): rnn_outputs, final_state = rnn.dynamic_rnn( cell=cell, inputs=sequence_input, initial_state=initial_state, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=False) activations = layers.fully_connected( inputs=rnn_outputs, num_outputs=num_label_columns, activation_fn=None, trainable=True) return activations, final_state
def run_lstm_mnist(lstm_cell=BasicLSTMCell, hidden_size=32, batch_size=256, steps=1000, log_file='log.tsv'): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) learning_rate = 0.001 file_logger = FileLogger(log_file, ['step', 'training_loss', 'training_accuracy']) x_ = tf.placeholder(tf.float32, (batch_size, mnist_img_size, 1)) t_ = tf.placeholder(tf.float32, (batch_size, mnist_img_size, 1)) y_ = tf.placeholder(tf.float32, (batch_size, num_classes)) if lstm_cell == PhasedLSTMCell: inputs = (t_, x_) else: inputs = x_ outputs, _ = dynamic_rnn(cell=lstm_cell(hidden_size), inputs=inputs, dtype=tf.float32) rnn_out = tf.squeeze(outputs[:, -1, :]) y = slim.fully_connected(inputs=rnn_out, num_outputs=num_classes, activation_fn=None) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_)) grad_update = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(tf.global_variables_initializer()) def feed_dict_phased_lstm(batch): img = np.expand_dims(batch[0], axis=2) t = np.reshape(np.tile(np.array(range(mnist_img_size)), (batch_size, 1)), (batch_size, mnist_img_size, 1)) return {x_: img, y_: batch[1], t_: t} def feed_dict_basic_lstm(batch): img = np.expand_dims(batch[0], axis=2) return {x_: img, y_: batch[1]} for i in range(steps): b = mnist.train.next_batch(batch_size) st = time() if lstm_cell == PhasedLSTMCell: feed_dict = feed_dict_phased_lstm(b) else: feed_dict = feed_dict_basic_lstm(b) tr_loss, tr_acc, _ = sess.run([cross_entropy, accuracy, grad_update], feed_dict=feed_dict) print('steps = {0} | time {1:.2f} | tr_loss = {2:.3f} | tr_acc = {3:.3f}'.format(str(i).zfill(6), time() - st, tr_loss, tr_acc)) file_logger.write([i, tr_loss, tr_acc]) file_logger.close()
def run_experiment(init_session=None, placeholder_def_func=get_placeholders): batch_size = BATCH_SIZE hidden_size = HIDDEN_STATES learning_rate = 3e-4 momentum = 0.9 file_logger = FileLogger('log.tsv', ['step', 'training_loss', 'benchmark_loss']) x, y = placeholder_def_func() if ADD_TIME_INPUTS: lstm = PhasedLSTMCell(hidden_size) print('Using PhasedLSTMCell impl.') else: lstm = BasicLSTMCell(hidden_size) print('Using BasicLSTMCell impl.') initial_state = (tf.random_normal([batch_size, hidden_size], stddev=0.1), tf.random_normal([batch_size, hidden_size], stddev=0.1)) outputs, state = dynamic_rnn(lstm, x, initial_state=initial_state, dtype=tf.float32) rnn_out = tf.squeeze(tf.slice(outputs, begin=[0, tf.shape(outputs)[1] - 1, 0], size=[-1, -1, -1])) # _, final_hidden = state fc0_w = create_weight_variable('fc0_w', [hidden_size, 1]) fc0_b = tf.get_variable('fc0_b', [1]) out = tf.matmul(rnn_out, fc0_w) + fc0_b loss = tf.reduce_mean(tf.square(tf.sub(out, y))) optimizer = create_adam_optimizer(learning_rate, momentum) trainable = tf.trainable_variables() grad_update = optimizer.minimize(loss, var_list=trainable) if init_session is not None: sess = init_session else: sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) init = tf.global_variables_initializer() sess.run(init) # lstm.__call__(x[:, 0, :], initial_state, scope=None) d = collections.deque(maxlen=10) benchmark_d = collections.deque(maxlen=10) for step in range(1, int(1e9)): x_s, y_s = next_batch(batch_size) loss_value, _, pred_value = sess.run([loss, grad_update, out], feed_dict={x: x_s, y: y_s}) # The mean converges to 0.5 for IID U(0,1) random variables. Good benchmark. benchmark_d.append(np.mean(np.square(0.5 - y_s))) d.append(loss_value) mean_loss = np.mean(d) benchmark_mean_loss = np.mean(benchmark_d) file_logger.write([step, mean_loss, benchmark_mean_loss]) file_logger.close()