def _build(self): """Connects the module to the graph. Returns: The learnable state, which has the same type, structure and shape as the `initial_state` passed to the constructor. """ flat_initial_state = nest.flatten(self._initial_state) if self._mask is not None: flat_mask = nest.flatten(self._mask) flat_learnable_state = [ _single_learnable_state(state, state_id=i, learnable=mask) for i, (state, mask) in enumerate(zip(flat_initial_state, flat_mask))] else: flat_learnable_state = [_single_learnable_state(state, state_id=i) for i, state in enumerate(flat_initial_state)] return nest.pack_sequence_as(structure=self._initial_state, flat_sequence=flat_learnable_state)
def testRegularizers(self, trainable, state_size): batch_size = 6 # Set the attribute to the class since it we can't set properties of # abstract classes snt.RNNCore.state_size = state_size flat_state_size = nest.flatten(state_size) core = snt.RNNCore(name="dummy_core") flat_regularizer = ([tf.contrib.layers.l1_regularizer(scale=0.5)] * len(flat_state_size)) trainable_regularizers = nest.pack_sequence_as( structure=state_size, flat_sequence=flat_regularizer) core.initial_state(batch_size, dtype=tf.float32, trainable=trainable, trainable_regularizers=trainable_regularizers) graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if not trainable: self.assertFalse(graph_regularizers) else: for i in range(len(flat_state_size)): self.assertRegexpMatches( graph_regularizers[i].name, ".*l1_regularizer.*")
def _create(self): # Concat bridge inputs on the depth dimensions bridge_input = nest.map_structure( lambda x: tf.reshape(x, [self.batch_size, _total_tensor_depth(x)]), self._bridge_input) bridge_input_flat = nest.flatten([bridge_input]) bridge_input_concat = tf.concat(bridge_input_flat, axis=1) state_size_splits = nest.flatten(self.decoder_state_size) total_decoder_state_size = sum(state_size_splits) # Pass bridge inputs through a fully connected layer layer initial_state_flat = tf.contrib.layers.fully_connected( bridge_input_concat, num_outputs=total_decoder_state_size, activation_fn=self._activation_fn, weights_initializer=tf.truncated_normal_initializer( stddev=self.parameter_init), biases_initializer=tf.zeros_initializer(), scope=None) # Shape back into required state size initial_state = tf.split(initial_state_flat, state_size_splits, axis=1) return nest.pack_sequence_as(self.decoder_state_size, initial_state)
def _zero_state(self, img, att, presence, state, transform_features, transform_state=False): with tf.variable_scope(self.__class__.__name__) as vs: features = self.extract_features(img, att)[1] if transform_features: features_flat = tf.reshape(features, (-1, self.n_units)) features_flat = AffineLayer(features_flat, self.n_units, name='init_feature_transform').output features = tf.reshape(features_flat, tf.shape(features)) rnn_outputs, hidden_state = self._propagate(features, state) hidden_state = nest.flatten(hidden_state) if transform_state: for i, hs in enumerate(hidden_state): name = 'init_state_transform_{}'.format(i) hidden_state[i] = AffineLayer(hs, self.n_units, name=name).output state = nest.pack_sequence_as(structure=state, flat_sequence=hidden_state) self.rnn_vs = vs return state, rnn_outputs
def _create(self): bridge_input = nest.map_structure( lambda x: tf.reshape(x, [self.batch_size, _total_tensor_depth(x)]), self._bridge_input) bridge_input_flat = nest.flatten([bridge_input]) bridge_input_concat = tf.concat(bridge_input_flat, 1) state_size_splits = nest.flatten(self.decoder_state_size) total_decoder_state_size = sum(state_size_splits) initial_state_flat = fully_connected( bridge_input_concat, total_decoder_state_size, self._mode, self._reuse, activation=self._activation_fn) initial_state = tf.split(initial_state_flat, state_size_splits, axis=1) return nest.pack_sequence_as(self.decoder_state_size, initial_state)
def trainable_initial_state(batch_size, state_size, initializer=None, name="initial_state"): flat_state_size = nest.flatten(state_size) if not initializer: flat_initializer = tuple(tf.zeros_initializer for _ in flat_state_size) else: flat_initializer = tuple(tf.zeros_initializer for initializer in flat_state_size) names = ["{}_{}".format(name, i) for i in xrange(len(flat_state_size))] tiled_states = [] for name, size, init in zip(names, flat_state_size, flat_initializer): shape_with_batch_dim = [1, size] initial_state_variable = tf.get_variable( name, shape=shape_with_batch_dim, initializer=init()) tiled_state = tf.tile(initial_state_variable, [batch_size, 1], name=(name + "_tiled")) tiled_states.append(tiled_state) return nest.pack_sequence_as(structure=state_size, flat_sequence=tiled_states)
def encode(self, inputs, input_length, _parses): with tf.name_scope('BiLSTMEncoder'): fw_cell_enc = tf.contrib.rnn.MultiRNNCell([self._make_rnn_cell(i) for i in range(self._num_layers)]) bw_cell_enc = tf.contrib.rnn.MultiRNNCell([self._make_rnn_cell(i) for i in range(self._num_layers)]) outputs, output_state = tf.nn.bidirectional_dynamic_rnn(fw_cell_enc, bw_cell_enc, inputs, input_length, dtype=tf.float32) fw_output_state, bw_output_state = output_state # concat each element of the final state, so that we're compatible with a unidirectional # decoder output_state = nest.pack_sequence_as(fw_output_state, [tf.concat((x, y), axis=1) for x, y in zip(nest.flatten(fw_output_state), nest.flatten(bw_output_state))]) return tf.concat(outputs, axis=2), output_state
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training): cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)]) encoder_hidden_size = int(enc_hidden_states.get_shape()[-1]) decoder_hidden_size = int(cell_dec.output_size) # if encoder and decoder have different sizes, add a projection layer if encoder_hidden_size != decoder_hidden_size: assert False, (encoder_hidden_size, decoder_hidden_size) with tf.variable_scope('hidden_projection'): kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32) # apply a relu to the projection for good measure enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state) enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]])) else: # flatten and repack the state enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state)) if self.config.connect_output_decoder: cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state) else: cell_dec = InputIgnoringCellWrapper(cell_dec, enc_final_state) if self.config.apply_attention: attention = LuongAttention(self.config.decoder_hidden_size, enc_hidden_states, self.input_length_placeholder, probability_fn=tf.nn.softmax) cell_dec = AttentionWrapper(cell_dec, attention, cell_input_fn=lambda inputs, _: inputs, attention_layer_size=self.config.decoder_hidden_size, initial_cell_state=enc_final_state) enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32) decoder = Seq2SeqDecoder(self.config, self.input_placeholder, self.input_length_placeholder, self.output_placeholder, self.output_length_placeholder, self.batch_number_placeholder) return decoder.decode(cell_dec, enc_final_state, self.config.grammar.output_size, output_embed_matrix, training)
def initial_cell_state_from_embedding(cell, z, batch_size, name=None): """Computes an initial RNN `cell` state from an embedding, `z`.""" flat_state_sizes = tf_nest.flatten(cell.state_size) return tf_nest.pack_sequence_as( cell.zero_state(batch_size=batch_size, dtype=tf.float32), tf.split( tf.layers.dense( z, sum(flat_state_sizes), activation=tf.tanh, kernel_initializer=tf.random_normal_initializer(stddev=0.001), name=name), flat_state_sizes, axis=1))
def pack_iterable_as(structure, flat_iterable): """See `nest.pack_sequence_as`. Provided for named-arg compatibility.""" return nest.pack_sequence_as(structure, flat_iterable)
def testInitialStateTuple(self, trainable, use_custom_initial_value, state_size): batch_size = 6 # Set the attribute to the class since it we can't set properties of # abstract classes snt.RNNCore.state_size = state_size flat_state_size = nest.flatten(state_size) core = snt.RNNCore(name="dummy_core") if use_custom_initial_value: flat_initializer = [tf.constant_initializer(2)] * len(flat_state_size) trainable_initializers = nest.pack_sequence_as( structure=state_size, flat_sequence=flat_initializer) else: trainable_initializers = None initial_state = core.initial_state( batch_size, dtype=tf.float32, trainable=trainable, trainable_initializers=trainable_initializers) nest.assert_same_structure(initial_state, state_size) flat_initial_state = nest.flatten(initial_state) for state, size in zip(flat_initial_state, flat_state_size): self.assertEqual(state.get_shape(), [batch_size, size]) with self.test_session() as sess: tf.global_variables_initializer().run() flat_initial_state_value = sess.run(flat_initial_state) for value, size in zip(flat_initial_state_value, flat_state_size): expected_initial_state = np.empty([batch_size, size]) if not trainable: expected_initial_state.fill(0) elif use_custom_initial_value: expected_initial_state.fill(2) else: value_row = value[0] expected_initial_state = np.tile(value_row, (batch_size, 1)) self.assertAllClose(value, expected_initial_state)
def __call__(self, inputs, state, scope=None): if isinstance(self.state_size, tuple) != isinstance(self._zoneout_prob, tuple): raise TypeError("Subdivided states need subdivided zoneouts.") if isinstance(self.state_size, tuple) and len(tuple(self.state_size)) != len(tuple(self._zoneout_prob)): raise ValueError("State and zoneout need equally many parts.") output, new_state = self._cell(inputs, state, scope) if isinstance(self.state_size, tuple): def train(): return tuple((1 - state_part_zoneout_prob) * tf.nn.dropout( new_state_part - state_part, (1 - state_part_zoneout_prob), seed=self._seed) + state_part for new_state_part, state_part, state_part_zoneout_prob in zip(new_state, state, self._zoneout_prob)) def test(): return tuple(state_part_zoneout_prob * state_part + (1 - state_part_zoneout_prob) * new_state_part for new_state_part, state_part, state_part_zoneout_prob in zip(new_state, state, self._zoneout_prob)) new_state = tf.cond(self.is_training, train, test) else: return NotImplemented new_state = nest.pack_sequence_as(structure=state, flat_sequence=new_state) return output, new_state
def nest_map(func, nested): if not nest.is_sequence(nested): return func(nested) flat = nest.flatten(nested) return nest.pack_sequence_as(nested, list(map(func, flat)))
def __call__(self, inputs, state, scope=None): varscope = scope or tf.get_variable_scope() flat_inputs = nest.flatten(inputs) flat_state = nest.flatten(state) flat_inputs_unstacked = list(zip(*[tf.unstack(tensor, num=self.beam_size, axis=1) for tensor in flat_inputs])) flat_state_unstacked = list(zip(*[tf.unstack(tensor, num=self.beam_size, axis=1) for tensor in flat_state])) flat_output_unstacked = [] flat_next_state_unstacked = [] output_sample = None next_state_sample = None for i, (inputs_k, state_k) in enumerate(zip(flat_inputs_unstacked, flat_state_unstacked)): inputs_k = nest.pack_sequence_as(inputs, inputs_k) state_k = nest.pack_sequence_as(state, state_k) if i == 0: output_k, next_state_k = self.cell(inputs_k, state_k, scope=scope) else: with tf.variable_scope(varscope, reuse=True): output_k, next_state_k = self.cell(inputs_k, state_k, scope=varscope if scope is not None else None) flat_output_unstacked.append(nest.flatten(output_k)) flat_next_state_unstacked.append(nest.flatten(next_state_k)) output_sample = output_k next_state_sample = next_state_k flat_output = [tf.stack(tensors, axis=1) for tensors in zip(*flat_output_unstacked)] flat_next_state = [tf.stack(tensors, axis=1) for tensors in zip(*flat_next_state_unstacked)] output = nest.pack_sequence_as(output_sample, flat_output) next_state = nest.pack_sequence_as(next_state_sample, flat_next_state) return output, next_state
def zero_state(self, batch_size, dtype): """Return zero-filled state tensor(s). Args: batch_size: int, float, or unit Tensor representing the batch size. dtype: the data type to use for the state. Returns: If `state_size` is an int or TensorShape, then the return value is a `N-D` tensor of shape `[batch_size x state_size]` filled with zeros. If `state_size` is a nested list or tuple, then the return value is a nested list or tuple (of the same structure) of `2-D` tensors with the shapes `[batch_size x s]` for each s in `state_size`. """ state_size = self.state_size if nest.is_sequence(state_size): state_size_flat = nest.flatten(state_size) zeros_flat = [ array_ops.zeros( array_ops.pack(_state_size_with_prefix(s, prefix=[batch_size])), dtype=dtype) for s in state_size_flat] for s, z in zip(state_size_flat, zeros_flat): z.set_shape(_state_size_with_prefix(s, prefix=[None])) zeros = nest.pack_sequence_as(structure=state_size, flat_sequence=zeros_flat) else: zeros_size = _state_size_with_prefix(state_size, prefix=[batch_size]) zeros = array_ops.zeros(array_ops.pack(zeros_size), dtype=dtype) zeros.set_shape(_state_size_with_prefix(state_size, prefix=[None])) return zeros
def output_size(self): if self._skip_connections: output_size = [] for core_sizes in zip(*tuple(_get_flat_core_sizes(self._cores))): added_core_size = core_sizes[0] added_core_size[0] = sum([size[0] for size in core_sizes]) output_size.append(tf.TensorShape(added_core_size)) return nest.pack_sequence_as(structure=self._cores[0].output_size, flat_sequence=output_size) else: return self._cores[-1].output_size
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) or nested tuples of tensors. lengths: A `Tensor` of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) flat_results = [[] for _ in range(len(input_seq))] for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape( ndims=sequence[0].get_shape().ndims) for input_ in sequence: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(sequence) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r, flat_result in zip(result, flat_results): r.set_shape(input_shape) flat_result.append(r) results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) for input_, flat_result in zip(input_seq, flat_results)] return results
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training): cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, for_decoder=True) for i in range(self.config.rnn_layers)]) encoder_hidden_size = int(enc_hidden_states.get_shape()[-1]) decoder_hidden_size = int(cell_dec.output_size) # if encoder and decoder have different sizes, add a projection layer if encoder_hidden_size != decoder_hidden_size: assert False, (encoder_hidden_size, decoder_hidden_size) with tf.variable_scope('hidden_projection'): kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32) # apply a relu to the projection for good measure enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state) enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]])) else: # flatten and repack the state enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state)) beam_width = self.config.training_beam_size if training else self.config.beam_size #cell_dec = ParentFeedingCellWrapper(cell_dec, tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width)) if self.config.apply_attention: attention = LuongAttention(decoder_hidden_size, tf.contrib.seq2seq.tile_batch(enc_hidden_states, beam_width), tf.contrib.seq2seq.tile_batch(self.input_length_placeholder, beam_width), probability_fn=tf.nn.softmax) cell_dec = AttentionWrapper(cell_dec, attention, cell_input_fn=lambda inputs, _: inputs, attention_layer_size=decoder_hidden_size, initial_cell_state=tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width)) enc_final_state = cell_dec.zero_state(self.batch_size * beam_width, dtype=tf.float32) else: enc_final_state = tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width) print('enc_final_state', enc_final_state) linear_layer = tf_core_layers.Dense(self.config.output_size) go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start decoder = BeamSearchOptimizationDecoder(training, cell_dec, output_embed_matrix, go_vector, self.config.grammar.end, enc_final_state, beam_width=beam_width, output_layer=linear_layer, gold_sequence=self.output_placeholder if training else None, gold_sequence_length=(self.output_length_placeholder+1) if training else None) if self.config.use_grammar_constraints: raise NotImplementedError("Grammar constraints are not implemented for the beam search yet") final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, maximum_iterations=self.config.max_length) return final_outputs
def _build(self, inputs, prev_state): """Connects the DeepRNN module into the graph. If this is not the first time the module has been connected to the graph, the Tensors provided as input_ and state must have the same final dimension, in order for the existing variables to be the correct size for their corresponding multiplications. The batch size may differ for each connection. Args: inputs: a nested tuple of Tensors of arbitrary dimensionality, with at least an initial batch dimension. prev_state: a tuple of `prev_state`s that corresponds to the state of each one of the cores of the `DeepCore`. Returns: output: a nested tuple of Tensors of arbitrary dimensionality, with at least an initial batch dimension. next_state: a tuple of `next_state`s that corresponds to the updated state of each one of the cores of the `DeepCore`. Raises: ValueError: if connecting the module into the graph any time after the first time, and the inferred size of the inputs does not match previous invocations. This may happen if one connects a module any time after the first time that does not have the configuration of skip connections as the first time. """ current_input = inputs next_states = [] outputs = [] recurrent_idx = 0 for i, core in enumerate(self._cores): if self._skip_connections and i > 0: flat_input = (nest.flatten(inputs), nest.flatten(current_input)) flat_input = [tf.concat(input_, 1) for input_ in zip(*flat_input)] current_input = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) # Determine if this core in the stack is recurrent or not and call # accordingly. if self._is_recurrent_list[i]: current_input, next_state = core(current_input, prev_state[recurrent_idx]) next_states.append(next_state) recurrent_idx += 1 else: current_input = core(current_input) if self._skip_connections: outputs.append(current_input) if self._skip_connections and self._concat_final_output_if_skip: flat_outputs = tuple(nest.flatten(output) for output in outputs) flat_outputs = [tf.concat(output, 1) for output in zip(*flat_outputs)] output = nest.pack_sequence_as(structure=outputs[0], flat_sequence=flat_outputs) else: output = current_input return output, tuple(next_states)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None, beam_search=True, beam_size=10): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, scope=scope, beam_search=beam_search, beam_size=beam_size) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, beam_search=beam_search, beam_size=beam_size) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def trainable_initial_state(batch_size, state_size, dtype, initializers=None): """Creates an initial state consisting of trainable variables. The trainable variables are created with the same shapes as the elements of `state_size` and are tiled to produce an initial state. Args: batch_size: An int, or scalar int32 Tensor representing the batch size. state_size: A `TensorShape` or nested tuple of `TensorShape`s to use for the shape of the trainable variables. dtype: The data type used to create the variables and thus initial state. initializers: An optional container of the same structure as `state_size` containing initializers for the variables. Returns: A `Tensor` or nested tuple of `Tensor`s with the same size and structure as `state_size`, where each `Tensor` is a tiled trainable `Variable`. Raises: ValueError: if the user passes initializers that are not functions. """ flat_state_size = nest.flatten(state_size) if not initializers: flat_initializer = tuple(tf.zeros_initializer for _ in flat_state_size) else: nest.assert_same_structure(initializers, state_size) flat_initializer = nest.flatten(initializers) if not all([callable(init) for init in flat_initializer]): raise ValueError("Not all the passed initializers are callable objects.") # Produce names for the variables. In the case of a tuple or nested tuple, # this is just a sequence of numbers, but for a flat `namedtuple`, we use # the field names. NOTE: this could be extended to nested `namedtuple`s, # but for now that's extra complexity that's not used anywhere. try: names = ["init_{}".format(state_size._fields[i]) for i in xrange(len(flat_state_size))] except (AttributeError, IndexError): names = ["init_state_{}".format(i) for i in xrange(len(flat_state_size))] flat_initial_state = [] for name, size, init in zip(names, flat_state_size, flat_initializer): shape_with_batch_dim = [1] + tensor_shape.as_shape(size).as_list() initial_state_variable = tf.get_variable( name, shape=shape_with_batch_dim, dtype=dtype, initializer=init) initial_state_variable_dims = initial_state_variable.get_shape().ndims tile_dims = [batch_size] + [1] * (initial_state_variable_dims - 1) flat_initial_state.append( tf.tile(initial_state_variable, tile_dims, name=(name + "_tiled"))) return nest.pack_sequence_as(structure=state_size, flat_sequence=flat_initial_state)
def _build(self, inputs, prev_state): """Connects the DeepRNN module into the graph. If this is not the first time the module has been connected to the graph, the Tensors provided as input_ and state must have the same final dimension, in order for the existing variables to be the correct size for their corresponding multiplications. The batch size may differ for each connection. Args: inputs: a nested tuple of Tensors of arbitrary dimensionality, with at least an initial batch dimension. prev_state: a tuple of `prev_state`s that corresponds to the state of each one of the cores of the `DeepCore`. Returns: output: a nested tuple of Tensors of arbitrary dimensionality, with at least an initial batch dimension. next_state: a tuple of `next_state`s that corresponds to the updated state of each one of the cores of the `DeepCore`. Raises: ValueError: if connecting the module into the graph any time after the first time, and the inferred size of the inputs does not match previous invocations. This may happen if one connects a module any time after the first time that does not have the configuration of skip connections as the first time. """ current_input = inputs next_states = [] outputs = [] recurrent_idx = 0 for i, core in enumerate(self._cores): if self._skip_connections and i > 0: flat_input = (nest.flatten(inputs), nest.flatten(current_input)) flat_input = [tf.concat(1, input_) for input_ in zip(*flat_input)] current_input = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) # Determine if this core in the stack is recurrent or not and call # accordingly. if self._is_recurrent_list[i]: current_input, next_state = core(current_input, prev_state[recurrent_idx]) next_states.append(next_state) recurrent_idx += 1 else: current_input = core(current_input) if self._skip_connections: outputs.append(current_input) if self._skip_connections: flat_outputs = tuple(nest.flatten(output) for output in outputs) flat_outputs = [tf.concat(1, output) for output in zip(*flat_outputs)] output = nest.pack_sequence_as(structure=outputs[0], flat_sequence=flat_outputs) else: output = current_input return output, tuple(next_states)
def dict_to_state_tuple(input_dict, cell): """Reconstructs nested `state` from a dict containing state `Tensor`s. Args: input_dict: A dict of `Tensor`s. cell: An instance of `RNNCell`. Returns: If `input_dict` does not contain keys 'STATE_PREFIX_i' for `0 <= i < n` where `n` is the number of nested entries in `cell.state_size`, this function returns `None`. Otherwise, returns a `Tensor` if `cell.state_size` is an `int` or a nested tuple of `Tensor`s if `cell.state_size` is a nested tuple. Raises: ValueError: State is partially specified. The `input_dict` must contain values for all state components or none at all. """ flat_state_sizes = nest.flatten(cell.state_size) state_tensors = [] with ops.name_scope('dict_to_state_tuple'): for i, state_size in enumerate(flat_state_sizes): state_name = _get_state_name(i) state_tensor = input_dict.get(state_name) if state_tensor is not None: rank_check = check_ops.assert_rank( state_tensor, 2, name='check_state_{}_rank'.format(i)) shape_check = check_ops.assert_equal( array_ops.shape(state_tensor)[1], state_size, name='check_state_{}_shape'.format(i)) with ops.control_dependencies([rank_check, shape_check]): state_tensor = array_ops.identity(state_tensor, name=state_name) state_tensors.append(state_tensor) if not state_tensors: return None elif len(state_tensors) == len(flat_state_sizes): dummy_state = cell.zero_state(batch_size=1, dtype=dtypes.bool) return nest.pack_sequence_as(dummy_state, state_tensors) else: raise ValueError( 'RNN state was partially specified.' 'Expected zero or {} state Tensors; got {}'. format(len(flat_state_sizes), len(state_tensors)))