def gibbs_sample(k): # Runs a k-step gibbs chain to sample from the probability distribution of the RBM defined by W, bh, bv def gibbs_step(count, k, xk): # Runs a single gibbs step. The visible values are initialized to xk hk = sample(tf.sigmoid(tf.matmul(xk, W) + bh)) # Propagate the visible values to sample the hidden values xk = sample( tf.sigmoid(tf.matmul(hk, tf.transpose(W)) + bv)) # Propagate the hidden values to sample the visible values return count + 1, k, xk # Run gibbs steps for k iterations ct = tf.constant(0) # counter [_, _, x_sample] = control_flow_ops.while_loop(lambda count, num_iter, *args: count < num_iter, gibbs_step, [ct, tf.constant(k), x]) # This is not strictly necessary in this implementation, # but if you want to adapt this code to use one of TensorFlow's # optimizers, you need this in order to stop tensorflow from propagating gradients back through the gibbs step x_sample = tf.stop_gradient(x_sample) return x_sample # Training Update Code # Now we implement the contrastive divergence algorithm. # First, we get the samples of x and h from the probability distribution # The sample of x
def gibbs_sample(k): #Runs a k-step gibbs chain to sample from the probability distribution of the RBM defined by W, bh, bv def gibbs_step(count, k, xk): #Runs a single gibbs step. The visible values are initialized to xk hk = sample(tf.sigmoid(tf.matmul(xk, W) + bh)) #Propagate the visible values to sample the hidden values xk = sample(tf.sigmoid(tf.matmul(hk, tf.transpose(W)) + bv)) #Propagate the hidden values to sample the visible values return count+1, k, xk #Run gibbs steps for k iterations ct = tf.constant(0) #counter [_, _, x_sample] = control_flow_ops.while_loop(lambda count, num_iter, *args: count < num_iter, gibbs_step, [ct, tf.constant(k), x]) #This is not strictly necessary in this implementation, but if you want to adapt this code to use one of TensorFlow's #optimizers, you need this in order to stop tensorflow from propagating gradients back through the gibbs step x_sample = tf.stop_gradient(x_sample) return x_sample ### Training Update Code # Now we implement the contrastive divergence algorithm. First, we get the samples of x and h from the probability distribution #The sample of x
def testLoops(self): """Tests that compilation accepts computations containing loops.""" with self.test_session() as session: x = array_ops.placeholder(dtypes.float32) with jit_scope(): c = lambda i, _: math_ops.less(i, 5) b = lambda i, x: (i + 1, x * 2.0 + 1.0) _, y = control_flow_ops.while_loop(c, b, (constant_op.constant(0), x)) run_metadata = config_pb2.RunMetadata() result = session.run(y, {x: np.float32(2)}, run_metadata=run_metadata, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assert_(MetadataHasXlaLaunch(run_metadata)) self.assertAllClose(result, np.float32(95), rtol=1e-1)
def __call__(self, inputs, state, timestep = 0, scope=None): with vs.variable_scope(scope or type(self).__name__): # define within cell constants/ counters used to control while loop for ACTStep prob = tf.constant(0.0,tf.float32,[self.batch_size], name="prob") prob_compare = tf.constant(0.0,tf.float32,[self.batch_size], name="prob_compare") counter = tf.constant(0.0, tf.float32,[self.batch_size], name="counter") acc_outputs = tf.zeros_like(state, tf.float32, name="output_accumulator") acc_states = tf.zeros_like(state, tf.float32, name="state_accumulator") batch_mask = tf.constant(True, tf.bool,[self.batch_size]) # While loop stops when this predicate is FALSE. # Ie all (probability < 1-eps AND counter < N) are false. pred = lambda batch_mask,prob_compare,prob,\ counter,state,inputs,acc_output,acc_state:\ tf.reduce_any( tf.logical_and( tf.less(prob_compare,self.one_minus_eps), tf.less(counter,self.N))) # only stop if all of the batch have passed either threshold # Do while loop iterations until predicate above is false. _,_,remainders,iterations,_,_,output,next_state = \ control_flow_ops.while_loop(pred,self.ACTStep, [batch_mask,prob_compare,prob, counter,state,inputs, acc_outputs, acc_states]) #accumulate remainder and N values self.ACT_remainder.append(tf.reduce_mean(1 - remainders)) self.ACT_iterations.append(tf.reduce_mean(iterations)) return output, next_state
def __call__(self, inputs, state, timestep = 0, scope=None): with vs.variable_scope(scope or type(self).__name__): # define within cell constants/ counters used to control while loop for ACTStep prob = tf.constant(0.0,tf.float32,[self.batch_size], name="prob") prob_compare = tf.constant(0.0,tf.float32,[self.batch_size], name="prob_compare") counter = tf.constant(0.0, tf.float32,[self.batch_size], name="counter") acc_outputs = tf.zeros_like(state, tf.float32, name="output_accumulator") acc_states = tf.zeros_like(state, tf.float32, name="state_accumulator") batch_mask = tf.constant(True, tf.bool,[self.batch_size]) # While loop stops when this predicate is FALSE. # Ie all (probability < 1-eps AND counter < N) are false. #x = self.ACTStep(batch_mask,prob_compare,prob,counter,state,inputs,acc_outputs,acc_states) pred = lambda batch_mask,prob_compare,prob,\ counter,state,input,acc_output,acc_state:\ tf.reduce_any( tf.logical_and( tf.less(prob_compare,self.one_minus_eps), tf.less(counter,self.N))) # only stop if all of the batch have passed either threshold # Do while loop iterations until predicate above is false. _,_,remainders,iterations,_,_,output,next_state = \ control_flow_ops.while_loop(pred,self.ACTStep, [batch_mask,prob_compare,prob, counter,state,inputs, acc_outputs, acc_states]) #accumulate remainder and N values self.ACT_remainder.append(tf.reduce_mean(1 - remainders)) self.ACT_iterations.append(tf.reduce_mean(iterations)) return output, next_state
def __call__(self, inputs, state, scope=None): with vs.variable_scope(scope or type(self).__name__): # define within cell constants/ counters used to control while loop for ACTStep if self.state_is_tuple: state = array_ops.concat(1, state) self.batch_size = tf.shape(inputs)[0] self.one_minus_eps = tf.fill([self.batch_size], tf.constant(1.0 - self.epsilon, dtype=tf.float32)) prob = tf.fill([self.batch_size], tf.constant(0.0, dtype=tf.float32), "prob") counter = tf.zeros_like(prob, tf.float32, name="counter") acc_outputs = tf.fill([self.batch_size, self.output_size], 0.0, name='output_accumulator') acc_states = tf.zeros_like(state, tf.float32, name="state_accumulator") flag = tf.fill([self.batch_size], True, name="flag") pred = lambda flag, prob, counter, state, inputs, acc_outputs, acc_states: tf.reduce_any(flag) _, probs, iterations, _, _, output, next_state = control_flow_ops.while_loop(pred, self.act_step, loop_vars=[flag, prob, counter, state, inputs, acc_outputs, acc_states]) self.ACT_remainder.append(1 - probs) self.ACT_iterations.append(iterations) if self.state_is_tuple: next_c, next_h = array_ops.split(1, 2, next_state) next_state = rnn_cell._LSTMStateTuple(next_c, next_h) return output, next_state
def gibbs_sample(k): def gibbs_step(count, k, xk): hk = sample(tf.nn.relu(tf.matmul(xk, W) + bh)) #Propagate the visible values to sample the hidden values xk = sample(tf.nn.relu(tf.matmul(hk, tf.transpose(W)) + bv)) #Propagate the hidden values to sample the visible values return count+1, k, xk ct = tf.constant(0) #counter [_, _, x_sample] = control_flow_ops.while_loop(lambda count, num_iter, *args: count < num_iter, gibbs_step, [ct, tf.constant(k), x]) # Stops tensorflow from propagating gradients back through the gibbs step x_sample = tf.stop_gradient(x_sample) return x_sample #The sample of x
def testLoops(self): """Tests that loops work on XLA devices.""" with session_lib.Session() as session: x = array_ops.placeholder(dtypes.float32) with ops.device("device:XLA_CPU:0"): c = lambda i, _: math_ops.less(i, 5) b = lambda i, x: (i + 1, x * 2.0 + 1.0) _, y = control_flow_ops.while_loop(c, b, (constant_op.constant(0), x)) result = session.run(y, {x: np.float32(2)}) self.assertAllClose(result, np.float32(95), rtol=1e-3)
def testLoopDeadlock(self): """Regression test for bug that caused deadlocks in graphs with loops.""" with self.test_session() as session: x = array_ops.placeholder(dtypes.float32) with jit_scope(): y = x + 1.0 c = lambda i, _x, _y: math_ops.less(i, 5) b = lambda i, x, _y: (i + 1, x * 2.0 + 1.0, x - 3.0) _, _, w = control_flow_ops.while_loop(c, b, (constant_op.constant(0), y, x)) u = w + y result = session.run(u, {x: np.float32(2)}) self.assertAllClose(result, np.float32(63), rtol=1e-1)
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): """Creates a matrix regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" lstm_size = FLAGS.lstm_cells max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope('lstm_forward'): g_recurrent_unit_forward = self.create_recurrent_unit(emb_dim,hidden_dim,l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h0 = tf.stack([h0, h0]) outputs_gate = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i,h_tm1,g_predictions,s_predictions): x_t = model_input[:,i,:] gate, h_t = g_recurrent_unit_forward(x_t, h_tm1) hidden_state, c_prev = tf.unstack(h_t) g_predictions = g_predictions.write(i,gate) s_predictions = s_predictions.write(i,c_prev) return i + 1, h_t, g_predictions, s_predictions _, _, gate_outputs, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32),h0,outputs_gate,outputs_state)) gate_outputs = gate_outputs.stack() state_outputs = state_outputs.stack() batch_size = tf.shape(model_input)[0] index_1 = tf.range(0, batch_size) * max_frames + (num_frames - 1) gate_outputs = tf.transpose(gate_outputs, [1, 0, 2]) gate_outputs = tf.gather(tf.reshape(gate_outputs, [-1, hidden_dim]), index_1) state_outputs = tf.transpose(state_outputs, [1, 0, 2]) state_outputs = tf.gather(tf.reshape(state_outputs, [-1, hidden_dim]), index_1) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state_outputs, vocab_size=vocab_size, **unused_params)
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): """Creates a matrix regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" lstm_size = FLAGS.lstm_cells max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope('lstm_forward'): g_recurrent_unit_forward = self.create_recurrent_unit(emb_dim,hidden_dim,l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h1 = tf.zeros([tf.shape(model_input)[0], emb_dim]) h0 = tf.stack([h0, h0]) h1 = tf.stack([h1, h1]) outputs_gate = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i, h_tm0, h_tm1, g_predictions, s_predictions): x_t = model_input[:,i,:] gate, h_t0, h_t1 = g_recurrent_unit_forward(x_t, h_tm0, h_tm1) hidden_state, c_prev = tf.unstack(h_t1) g_predictions = g_predictions.write(i,gate) s_predictions = s_predictions.write(i,c_prev) return i + 1, h_t0, h_t1, g_predictions, s_predictions _, _, _, gate_outputs, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32), h1, h0,outputs_gate,outputs_state)) gate_outputs = gate_outputs.stack() state_outputs = state_outputs.stack() batch_size = tf.shape(model_input)[0] index_1 = tf.range(0, batch_size) * max_frames + (num_frames - 1) gate_outputs = tf.transpose(gate_outputs, [1, 0, 2]) gate_outputs = tf.gather(tf.reshape(gate_outputs, [-1, hidden_dim]), index_1) state_outputs = tf.transpose(state_outputs, [1, 0, 2]) state_outputs = tf.gather(tf.reshape(state_outputs, [-1, hidden_dim]), index_1) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state_outputs, vocab_size=vocab_size, **unused_params)
def rnn_gate(self, model_input, lstm_size, num_frames, l2_penalty=1e-8, sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope(sub_scope+'lstm_forward'): g_recurrent_unit_forward = LstmGlu2Model().create_recurrent_unit(emb_dim,hidden_dim,l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h1 = tf.zeros([tf.shape(model_input)[0], emb_dim]) h0 = tf.stack([h0, h0]) h1 = tf.stack([h1, h1]) outputs_hidden = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i, h_tm0, h_tm1, g_predictions, s_predictions): x_t = model_input[:,i,:] gate, h_t0, h_t1 = g_recurrent_unit_forward(x_t, h_tm0, h_tm1) hidden_state, c_prev = tf.unstack(h_t1) g_predictions = g_predictions.write(i,hidden_state) s_predictions = s_predictions.write(i,c_prev) return i + 1, h_t0, h_t1, g_predictions, s_predictions _, _, _, hidden_outputs, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32), h1, h0,outputs_hidden,outputs_state)) state_outputs = state_outputs.stack() state_outputs = tf.transpose(state_outputs, [1, 0, 2]) hidden_outputs = hidden_outputs.stack() hidden_outputs = tf.transpose(hidden_outputs, [1, 0, 2]) hidden_outputs = tf.reshape(hidden_outputs, [-1, max_frames, lstm_size]) batch_size = tf.shape(model_input)[0] index_1 = tf.range(0, batch_size) * max_frames + (num_frames - 1) state_outputs = tf.gather(tf.reshape(state_outputs, [-1, hidden_dim]), index_1) return state_outputs, hidden_outputs
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): """Creates a matrix regression model. Args: model_input: 'batch' x 'num_features' x 'num_methods' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" lstm_size = FLAGS.lstm_cells max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope('lstm_forward'): g_recurrent_unit_forward = self.create_recurrent_unit(emb_dim,hidden_dim,l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h0 = tf.stack([h0, h0, h0]) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i, h_tm1, s_predictions): x_t = model_input[:,i,:] gate, h_t1 = g_recurrent_unit_forward(x_t, h_tm1) hidden_state, c_prev, m_prev = tf.unstack(h_t1) s_predictions = s_predictions.write(i, c_prev + m_prev) return i + 1, h_t1, s_predictions _, _, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32), h0,outputs_state)) state_outputs = state_outputs.stack() batch_size = tf.shape(model_input)[0] index_1 = tf.range(0, batch_size) * max_frames + (num_frames - 1) state_outputs = tf.transpose(state_outputs, [1, 0, 2]) state_outputs = tf.gather(tf.reshape(state_outputs, [-1, hidden_dim]), index_1) aggregated_model = getattr(video_level_models, FLAGS.video_level_classifier_model) return aggregated_model().create_model( model_input=state_outputs, vocab_size=vocab_size, **unused_params)
def rnn_gate(self, model_input, lstm_size, num_frames, l2_penalty=1e-8, sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope(sub_scope+'lstm_forward'): g_recurrent_unit_forward = LstmGateModel().create_recurrent_unit(emb_dim,hidden_dim,l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h0 = tf.stack([h0, h0]) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i,h_tm1,s_predictions): x_t = model_input[:,i,:] gate, h_t = g_recurrent_unit_forward(x_t, h_tm1) hidden_state, c_prev = tf.unstack(h_t) s_predictions = s_predictions.write(i,c_prev) return i + 1, h_t, s_predictions _, _, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32),h0,outputs_state), swap_memory=True) state_outputs = state_outputs.stack() batch_size = tf.shape(model_input)[0] index_1 = tf.range(0, batch_size) * max_frames + (num_frames - 1) state_outputs = tf.transpose(state_outputs, [1, 0, 2]) state_outputs = tf.gather(tf.reshape(state_outputs, [-1, hidden_dim]), index_1) return state_outputs
def rnn_gate(self, model_input, lstm_size, num_frames, l2_penalty=1e-8, sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope(sub_scope+'lstm_forward'): g_recurrent_unit_forward = LstmGateModel().create_recurrent_unit(emb_dim,hidden_dim,l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h0 = tf.stack([h0, h0]) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i,h_tm1,s_predictions): x_t = model_input[:,i,:] gate, h_t = g_recurrent_unit_forward(x_t, h_tm1) hidden_state, c_prev = tf.unstack(h_t) s_predictions = s_predictions.write(i,hidden_state) return i + 1, h_t, s_predictions _, _, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32), h0, outputs_state), swap_memory=True) state_outputs = state_outputs.stack() state_outputs = tf.transpose(state_outputs, [1, 0, 2]) return state_outputs
def rnn_gate(self, model_input, lstm_size, num_frames, l2_penalty=1e-8, sub_scope="", **unused_params): """Creates a model which uses a stack of LSTMs to represent the video. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ ## Batch normalize the input max_frames = model_input.get_shape().as_list()[1] emb_dim = model_input.get_shape().as_list()[2] hidden_dim = lstm_size with tf.variable_scope(sub_scope+'lstm_forward'): g_recurrent_unit_forward = LstmGlu2Model().create_recurrent_unit(emb_dim, hidden_dim, l2_penalty) h0 = tf.zeros([tf.shape(model_input)[0], hidden_dim]) h0 = tf.stack([h0, h0]) h1 = tf.zeros([tf.shape(model_input)[0], emb_dim]) h1 = tf.stack([h1, h1]) outputs_state = tensor_array_ops.TensorArray( dtype=tf.float32, size=max_frames, dynamic_size=False, infer_shape=True) def _pretrain_forward(i, h_tm0, h_tm1,s_predictions): x_t = model_input[:,i,:] gate, h_t0, h_t1 = g_recurrent_unit_forward(x_t, h_tm0, h_tm1) hidden_state, c_prev = tf.unstack(h_t1) s_predictions = s_predictions.write(i,hidden_state) return i + 1, h_t0, h_t1, s_predictions _, _, _, state_outputs = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3: i < max_frames, body=_pretrain_forward, loop_vars=(tf.constant(0, dtype=tf.int32), h1, h0, outputs_state), swap_memory=True) state_outputs = state_outputs.stack() state_outputs = tf.transpose(state_outputs, [1, 0, 2]) return state_outputs
def decode(self, enc_outputs, enc_final_state): with tf.variable_scope(self.decoder.scope): def condition(time, all_outputs: tf.TensorArray, inputs, states): def check_outputs_ends(): def has_end_word(t): return tf.reduce_any(tf.equal(t, ANSWER_MAX)) output_label = tf.arg_max(all_outputs.stack(), 2) output_label = tf.Print(output_label, [output_label], "Output Labels: ") # The outputs are time-major, which means time is the first # dimension. Here I need to check whether all the generated # answers are ends with "</s>", so we need to transpose it # to batch-major. Because `map_fn` only map function by the # first dimension. batch_major_outputs = tf.transpose(output_label, (1, 0)) all_outputs_ends = tf.reduce_all(tf.map_fn(has_end_word, batch_major_outputs, dtype=tf.bool)) return all_outputs_ends # If the TensorArray has 0 size, stack() will trigger error, # so I have to use condition function to check whether the # size is 0. all_ends = tf.cond(tf.equal(all_outputs.size(), 0), lambda: tf.constant(False, tf.bool), check_outputs_ends) condition_result = tf.logical_and(tf.logical_not(all_ends), tf.less(time, ANSWER_MAX)) return condition_result def body(time, all_outputs, inputs, state): dec_outputs, dec_state, output_logits, next_input = self.decoder.step(inputs, state) all_outputs = all_outputs.write(time, output_logits) return time + 1, all_outputs, next_input, dec_state output_ta = tensor_array_ops.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, element_shape=(None, config.DEC_VOCAB), clear_after_read=False) # with time-major data input, the batch size is the second dimension batch_size = tf.shape(enc_outputs)[1] zero_input = tf.ones(tf.expand_dims(batch_size, axis=0), dtype=tf.int32) * ANSWER_START res = control_flow_ops.while_loop( condition, body, loop_vars=[0, output_ta, self.decoder.zero_input(zero_input), enc_final_state], ) final_outputs = res[1].stack() final_outputs = tf.Print(final_outputs, [final_outputs], "Final Output: ") final_state = res[3] return final_outputs, final_state