我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.array_ops.concat()。
def _transpose_batch_time(x): """Transpose the batch and time dimensions of a Tensor. Retains as much of the static shape information as possible. Args: x: A tensor of rank 2 or higher. Returns: x transposed along the first two dimensions. Raises: ValueError: if `x` is rank 1 or lower. """ x_static_shape = x.get_shape() if x_static_shape.ndims is not None and x_static_shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2, but saw shape: %s" % (x, x_static_shape)) x_rank = array_ops.rank(x) x_t = array_ops.transpose( x, array_ops.concat( ([1, 0], math_ops.range(2, x_rank)), axis=0)) x_t.set_shape( tensor_shape.TensorShape([ x_static_shape[1].value, x_static_shape[0].value ]).concatenate(x_static_shape[2:])) return x_t
def testHStack(self): with self.test_session(force_gpu=True): p1 = array_ops.placeholder(dtypes.complex64, shape=[4, 4]) p2 = array_ops.placeholder(dtypes.complex64, shape=[4, 4]) c = array_ops.concat([p1, p2], 0) params = { p1: (np.random.rand(4, 4) + 1j*np.random.rand(4, 4)).astype(np.complex64), p2: (np.random.rand(4, 4) + 1j*np.random.rand(4, 4)).astype(np.complex64), } result = c.eval(feed_dict=params) self.assertEqual(result.shape, c.get_shape()) self.assertAllEqual(result[:4, :], params[p1]) self.assertAllEqual(result[4:, :], params[p2])
def testVStack(self): with self.test_session(force_gpu=True): p1 = array_ops.placeholder(dtypes.complex64, shape=[4, 4]) p2 = array_ops.placeholder(dtypes.complex64, shape=[4, 4]) c = array_ops.concat([p1, p2], 1) params = { p1: (np.random.rand(4, 4) + 1j*np.random.rand(4, 4)).astype(np.complex64), p2: (np.random.rand(4, 4) + 1j*np.random.rand(4, 4)).astype(np.complex64), } result = c.eval(feed_dict=params) self.assertEqual(result.shape, c.get_shape()) self.assertAllEqual(result[:, :4], params[p1]) self.assertAllEqual(result[:, 4:], params[p2])
def testGradientWithUnknownInputDim(self): with self.test_session(use_gpu=True): x = array_ops.placeholder(dtypes.complex64) y = array_ops.placeholder(dtypes.complex64) c = array_ops.concat([x, y], 2) output_shape = [10, 2, 9] grad_inp = (np.random.rand(*output_shape) + 1j*np.random.rand(*output_shape)).astype(np.complex64) grad_tensor = constant_op.constant( [inp for inp in grad_inp.flatten()], shape=output_shape) grad = gradients_impl.gradients([c], [x, y], [grad_tensor]) concated_grad = array_ops.concat(grad, 2) params = { x: (np.random.rand(10, 2, 3) + 1j*np.random.rand(10, 2, 3)).astype(np.complex64), y: (np.random.rand(10, 2, 6) + 1j*np.random.rand(10, 2, 6)).astype(np.complex64), } result = concated_grad.eval(feed_dict=params) self.assertAllEqual(result, grad_inp)
def testShapeWithUnknownConcatDim(self): p1 = array_ops.placeholder(dtypes.complex64) c1 = constant_op.constant(np.complex64(10.0+0j), shape=[4, 4, 4, 4]) p2 = array_ops.placeholder(dtypes.complex64) c2 = constant_op.constant(np.complex64(20.0+0j), shape=[4, 4, 4, 4]) dim = array_ops.placeholder(dtypes.int32) concat = array_ops.concat([p1, c1, p2, c2], dim) self.assertEqual(4, concat.get_shape().ndims) # All dimensions unknown. concat2 = array_ops.concat([p1, p2], dim) self.assertEqual(None, concat2.get_shape()) # Rank doesn't match. c3 = constant_op.constant(np.complex64(30.0+0j), shape=[4, 4, 4]) with self.assertRaises(ValueError): array_ops.concat([p1, c1, p2, c3], dim)
def concatenate(tensors, axis=-1): """Concatenates a list of tensors alongside the specified axis. Arguments: tensors: list of tensors to concatenate. axis: concatenation axis. Returns: A tensor. """ if axis < 0: rank = ndim(tensors[0]) if rank: axis %= rank else: axis = 0 if py_all([is_sparse(x) for x in tensors]): return sparse_ops.sparse_concat(axis, tensors) else: return array_ops.concat([to_dense(x) for x in tensors], axis)
def build(self, input_shape): # Used purely for shape validation. if not isinstance(input_shape, list): raise ValueError('`Concatenate` layer should be called ' 'on a list of inputs') if all([shape is None for shape in input_shape]): return reduced_inputs_shapes = [ tensor_shape.TensorShape(shape).as_list() for shape in input_shape ] shape_set = set() for i in range(len(reduced_inputs_shapes)): del reduced_inputs_shapes[i][self.axis] shape_set.add(tuple(reduced_inputs_shapes[i])) if len(shape_set) > 1: raise ValueError('`Concatenate` layer requires ' 'inputs with matching shapes ' 'except for the concat axis. ' 'Got inputs shapes: %s' % (input_shape)) self.built = True
def __init__(self, layer, merge_mode='concat', weights=None, **kwargs): super(Bidirectional, self).__init__(layer, **kwargs) if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]: raise ValueError('Invalid merge mode. ' 'Merge mode should be one of ' '{"sum", "mul", "ave", "concat", None}') self.forward_layer = copy.copy(layer) config = layer.get_config() config['go_backwards'] = not config['go_backwards'] self.backward_layer = layer.__class__.from_config(config) self.forward_layer.name = 'forward_' + self.forward_layer.name self.backward_layer.name = 'backward_' + self.backward_layer.name self.merge_mode = merge_mode if weights: nw = len(weights) self.forward_layer.initial_weights = weights[:nw // 2] self.backward_layer.initial_weights = weights[nw // 2:] self.stateful = layer.stateful self.return_sequences = layer.return_sequences self.supports_masking = True
def inference_graph(self, data): with ops.device(self.device_assigner.get_device(self.layer_num)): # Compute activations for the neural network. nn_activations = [layers.fully_connected(data, self.params.layer_size)] for _ in range(1, self.params.num_layers): # pylint: disable=W0106 nn_activations.append( layers.fully_connected( nn_activations[-1], self.params.layer_size)) nn_activations_tensor = array_ops.concat( 1, nn_activations, name="flattened_nn_activations") return nn_activations_tensor
def vector_shape(self, name="vector_shape"): """Shape of (batch) vectors that this (batch) matrix will multiply. If this operator represents the batch matrix `A` with `A.shape = [N1,...,Nn, k, k]`, the `vector_shape` is `[N1,...,Nn, k]`. Args: name: A name scope to use for ops added by this method. Returns: `int32` `Tensor` """ # Derived classes get this "for free" once .shape() is implemented. with ops.name_scope(self.name): with ops.name_scope(name, values=self.inputs): return array_ops.concat( 0, (self.batch_shape(), [self.vector_space_dimension()]))
def _get_identity_operator(self, v): """Get an `OperatorPDIdentity` to play the role of `D` in `VDV^T`.""" with ops.name_scope("get_identity_operator", values=[v]): if v.get_shape().is_fully_defined(): v_shape = v.get_shape().as_list() v_batch_shape = v_shape[:-2] r = v_shape[-1] id_shape = v_batch_shape + [r, r] else: v_shape = array_ops.shape(v) v_rank = array_ops.rank(v) v_batch_shape = array_ops.slice(v_shape, [0], [v_rank - 2]) r = array_ops.gather(v_shape, v_rank - 1) # Last dim of v id_shape = array_ops.concat(0, (v_batch_shape, [r, r])) return operator_pd_identity.OperatorPDIdentity( id_shape, v.dtype, verify_pd=self._verify_pd)
def _sample_n(self, n, seed=None): # We use 2 uniform random floats to generate polar random variates. # http://dl.acm.org/citation.cfm?id=179631 # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1]. # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0. # Let X = R*cos(theta), and let Y = R*sin(theta). # Then X ~ t_df and Y ~ t_df. # The variates X and Y are not independent. shape = array_ops.concat(0, ([2, n], self.batch_shape())) uniform = random_ops.random_uniform(shape=shape, dtype=self.dtype, seed=seed) samples_g, samples_h = array_ops.unpack(uniform, num=2) theta = (2. * math.pi) * samples_h r = math_ops.sqrt(self.df * (math_ops.pow(samples_g, -2 / self.df) - 1)) samples = r * math_ops.cos(theta) return samples * self.sigma + self.mu
def _mode(self): mode = ((self.alpha - 1.) / (array_ops.expand_dims(self.alpha_sum, dim=-1) - math_ops.cast(self.event_shape()[0], self.dtype))) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) shape = array_ops.concat(0, (self.batch_shape(), self.event_shape())) return math_ops.select( math_ops.greater(self.alpha, 1.), mode, array_ops.fill(shape, nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.alpha, message="mode not defined for components of alpha <= 1") ], mode)
def _sample_n(self, n, seed=None): # Recall _assert_valid_mu ensures mu and self._cov have same batch shape. shape = array_ops.concat(0, [self._cov.vector_shape(), [n]]) white_samples = random_ops.random_normal(shape=shape, mean=0, stddev=1, dtype=self.dtype, seed=seed) correlated_samples = self._cov.sqrt_matmul(white_samples) # Move the last dimension to the front perm = array_ops.concat(0, ( array_ops.pack([array_ops.rank(correlated_samples) - 1]), math_ops.range(0, array_ops.rank(correlated_samples) - 1))) # TODO(ebrevdo): Once we get a proper tensor contraction op, # perform the inner product using that instead of batch_matmul # and this slow transpose can go away! correlated_samples = array_ops.transpose(correlated_samples, perm) samples = correlated_samples + self.mu return samples
def _get_concat_variable(name, shape, dtype, num_shards): """Get a sharded variable concatenated into one tensor.""" sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards) if len(sharded_variable) == 1: return sharded_variable[0] concat_name = name + "/concat" concat_full_name = vs.get_variable_scope().name + "/" + concat_name + ":0" for value in ops.get_collection(ops.GraphKeys.CONCATENATED_VARIABLES): if value.name == concat_full_name: return value concat_variable = array_ops.concat(0, sharded_variable, name=concat_name) ops.add_to_collection(ops.GraphKeys.CONCATENATED_VARIABLES, concat_variable) return concat_variable
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Tensor` after applying possible centered bias. Returns: Dict of prediction `Tensor` keyed by `PredictionKey`. """ predictions = {prediction_key.PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid( logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[prediction_key.PredictionKey.PROBABILITIES] = nn.softmax( logits) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.argmax( logits, 1) return predictions
def _dense_inner_flatten(inputs, new_rank): """Helper function for `inner_flatten`.""" rank_assertion = check_ops.assert_rank_at_least( inputs, new_rank, message='inputs has rank less than new_rank') with ops.control_dependencies([rank_assertion]): outer_dimensions = array_ops.slice( array_ops.shape(inputs), [0], [new_rank - 1]) new_shape = array_ops.concat(0, (outer_dimensions, [-1])) reshaped = array_ops.reshape(inputs, new_shape) # if `new_rank` is an integer, try to calculate new shape. if isinstance(new_rank, six.integer_types): static_shape = inputs.get_shape() if static_shape is not None and static_shape.dims is not None: static_shape = static_shape.as_list() static_outer_dims = static_shape[:new_rank - 1] static_inner_dims = static_shape[new_rank - 1:] flattened_dimension = 1 for inner_dim in static_inner_dims: if inner_dim is None: flattened_dimension = None break flattened_dimension *= inner_dim reshaped.set_shape(static_outer_dims + [flattened_dimension]) return reshaped
def _forward(self, x): # Pad the last dim with a zeros vector. We need this because it lets us # infer the scale in the inverse function. y = array_ops.expand_dims(x, dim=-1) if self._static_event_ndims == 0 else x ndims = (y.get_shape().ndims if y.get_shape().ndims is not None else array_ops.rank(y)) y = array_ops.pad(y, paddings=array_ops.concat(0, ( array_ops.zeros((ndims - 1, 2), dtype=dtypes.int32), [[0, 1]]))) # Set shape hints. if x.get_shape().ndims is not None: shape = x.get_shape().as_list() if self._static_event_ndims == 0: shape += [2] elif shape[-1] is not None: shape[-1] += 1 shape = tensor_shape.TensorShape(shape) y.get_shape().assert_is_compatible_with(shape) y.set_shape(shape) # Since we only support event_ndims in [0, 1] and we do padding, we always # reduce over the last dimension, i.e., dim=-1 (which is the default). return nn_ops.softmax(y)
def _sample_n(self, n, seed=None): # Recall _assert_valid_mu ensures mu and self._cov have same batch shape. shape = array_ops.concat(0, [self._cov.vector_shape(), [n]]) white_samples = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) correlated_samples = self._cov.sqrt_matmul(white_samples) # Move the last dimension to the front perm = array_ops.concat(0, ( array_ops.pack([array_ops.rank(correlated_samples) - 1]), math_ops.range(0, array_ops.rank(correlated_samples) - 1))) # TODO(ebrevdo): Once we get a proper tensor contraction op, # perform the inner product using that instead of batch_matmul # and this slow transpose can go away! correlated_samples = array_ops.transpose(correlated_samples, perm) samples = correlated_samples + self.mu return samples
def _get_concat_variable(name, shape, dtype, num_shards): """Get a sharded variable concatenated into one tensor.""" sharded_variable = _get_sharded_variable(name, shape, dtype, num_shards) if len(sharded_variable) == 1: return sharded_variable[0] concat_name = name + "/concat" concat_full_name = vs.get_variable_scope().name + "/" + concat_name + ":0" for value in ops.get_collection(ops.GraphKeys.CONCATENATED_VARIABLES): if value.name == concat_full_name: return value concat_variable = array_ops.concat(sharded_variable, 0, name=concat_name) ops.add_to_collection(ops.GraphKeys.CONCATENATED_VARIABLES, concat_variable) return concat_variable
def __call__(self, inputs, state, mask, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = array_ops.split(1, 2, state) concat = linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j) mask = array_ops.expand_dims(mask, 1) new_c = mask * new_c + (1. - mask) * c new_h = tanh(new_c) * sigmoid(o) new_h = mask * new_h + (1. - mask) * h return new_h, array_ops.concat(1, [new_c, new_h])
def __call__(self, inputs, state, scope=None): """Convolutional Long short-term memory cell (ConvLSTM).""" with vs.variable_scope(scope or type(self).__name__): # "ConvLSTMCell" if self._state_is_tuple: c, h = state else: c, h = array_ops.split(3, 2, state) # batch_size * height * width * channel concat = _conv([inputs, h], 4 * self._num_units, self._k_size, True, initializer=self._initializer) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(3, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(3, [new_c, new_h]) return new_h, new_state
def __call__(self, inputs, state, scope=None): """Run one step of SRU.""" with tf.variable_scope(scope or type(self).__name__): # "SRUCell" with tf.variable_scope("x_hat"): x = linear([inputs], self._num_units, False) with tf.variable_scope("gates"): concat = tf.sigmoid(linear([inputs], 2 * self._num_units, True)) f, r = tf.split(concat, 2, axis = 1) with tf.variable_scope("candidates"): c = self._activation(f * state + (1 - f) * x) # variational dropout as suggested in the paper (disabled) # if self._is_training and Params.dropout is not None: # c = tf.nn.dropout(c, keep_prob = 1 - Params.dropout) # highway connection # Our implementation is slightly different to the paper # https://arxiv.org/abs/1709.02755 in a way that highway network # uses x_hat instead of the cell inputs. Check equation (7) from the original # paper for SRU. h = r * c + (1 - r) * x return h, c
def call(self, inputs, state): """Long short-term memory cell with attention (LSTMA).""" state, attns, attn_states = state attn_states = array_ops.reshape(attn_states, [-1, self._attn_length, self._attn_size]) input_size = self._input_size if input_size is None: input_size = inputs.get_shape().as_list()[1] inputs = _linear([inputs, attns], input_size, True) lstm_output, new_state = self._cell(inputs, state) new_state_cat = array_ops.concat(nest.flatten(new_state), 1) new_attns, new_attn_states = self._attention(new_state_cat, attn_states) with tf.variable_scope("attn_output_projection"): output = _linear([lstm_output, new_attns], self._attn_size, True) new_attn_states = array_ops.concat( [new_attn_states, array_ops.expand_dims(output, 1)], 1) new_attn_states = array_ops.reshape( new_attn_states, [-1, self._attn_length * self._attn_size]) new_state = (new_state, new_attns, new_attn_states) return output, new_state
def multiPLSTM(cells, inputs, lens, n_input, initial_states): """ Function to build multilayer PLSTM :param cells: :param inputs: :param lens: 2D tensor, length of the sequences in the batch (for synamic rnn use) :param n_input: integer, number of features in the input (without time feature) :param initial_states: list of tuples of initial states :return: 3D tensor, output of the multilayer PLSTM """ assert (len(initial_states) == len(cells)) times = tf.slice(inputs, [0, 0, n_input], [-1, -1, 1]) newX = tf.slice(inputs, [0, 0, 0], [-1, -1, n_input]) for k, cell, initial_state in zip(range(len(cells)), cells, initial_states): newX = tf.concat(2, [newX, times]) with tf.variable_scope("{}".format(k)): outputs, initial_state = tf.nn.dynamic_rnn(cell, newX, dtype=tf.float32, sequence_length=lens, initial_state=initial_state) newX = outputs return newX
def multiPLSTM(cells, inputs, lens, n_input, initial_states): """ Function to build multilayer PLSTM :param cells: :param inputs: :param lens: 2D tensor, length of the sequences in the batch (for synamic rnn use) :param n_input: integer, number of features in the input (without time feature) :param initial_states: list of tuples of initial states :return: 3D tensor, output of the multilayer PLSTM """ assert (len(initial_states) == len(cells)) times = tf.slice(inputs, [0, 0, n_input], [-1, -1, 1]) new_x = tf.slice(inputs, [0, 0, 0], [-1, -1, n_input]) for k, cell, initial_state in zip(range(len(cells)), cells, initial_states): new_x = tf.concat(axis=2, values=[new_x, times]) with tf.variable_scope("{}".format(k)): outputs, initial_states[k] = tf.nn.dynamic_rnn(cell, new_x, dtype=tf.float32, sequence_length=lens, initial_state=initial_state) new_x = outputs return new_x
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) concat = _linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) return new_h, new_state
def transpose_batch_time(x): """Transpose the batch and time dimensions of a Tensor. Retains as much of the static shape information as possible. Args: x: A tensor of rank 2 or higher. Returns: x transposed along the first two dimensions. Raises: ValueError: if `x` is rank 1 or lower. """ x_static_shape = x.get_shape() if x_static_shape.ndims is not None and x_static_shape.ndims < 2: raise ValueError( "Expected input tensor %s to have rank at least 2, but saw shape: %s" % (x, x_static_shape)) x_rank = array_ops.rank(x) x_t = array_ops.transpose(x, array_ops.concat(([1, 0], math_ops.range(2, x_rank)), axis=0)) x_t.set_shape(tf.tensor_shape.TensorShape([ x_static_shape[1].value, x_static_shape[0].value]).concatenate(x_static_shape[2:])) return x_t
def __call__(self, inputs, state, scope=None): """Run this multi-layer cell on inputs, starting from state.""" with vs.variable_scope(scope or type(self).__name__): # "MultiRNNCell" cur_state_pos = 0 cur_inp = inputs new_states = [] for i, cell in enumerate(self._cells): with vs.variable_scope("Cell%d" % i): if self._state_is_tuple: if not nest.is_sequence(state): raise ValueError( "Expected state to be a tuple of length %d, but received: %s" % (len(self.state_size), state)) cur_state = state[i] else: cur_state = array_ops.slice( state, [0, cur_state_pos], [-1, cell.state_size]) cur_state_pos += cell.state_size cur_inp, new_state = cell(cur_inp, cur_state) new_states.append(new_state) new_states = (tuple(new_states) if self._state_is_tuple else array_ops.concat(1, new_states)) return cur_inp, new_states
def __call__(self, inputs, state, k_size=3, scope=None): """Convolutional Long short-term memory cell (ConvLSTM).""" with vs.variable_scope(scope or type(self).__name__): # "ConvLSTMCell" if self._state_is_tuple: c, h = state else: c, h = array_ops.split(3, 2, state) # batch_size * height * width * channel concat = _conv([inputs, h], 4 * self._num_units, k_size, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(3, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(3, [new_c, new_h]) return new_h, new_state
def _dense_inner_flatten(inputs, new_rank): """Helper function for `inner_flatten`.""" rank_assertion = check_ops.assert_rank_at_least( inputs, new_rank, message='inputs has rank less than new_rank') with ops.control_dependencies([rank_assertion]): outer_dimensions = array_ops.strided_slice( array_ops.shape(inputs), [0], [new_rank - 1]) new_shape = array_ops.concat((outer_dimensions, [-1]), 0) reshaped = array_ops.reshape(inputs, new_shape) # if `new_rank` is an integer, try to calculate new shape. if isinstance(new_rank, six.integer_types): static_shape = inputs.get_shape() if static_shape is not None and static_shape.dims is not None: static_shape = static_shape.as_list() static_outer_dims = static_shape[:new_rank - 1] static_inner_dims = static_shape[new_rank - 1:] flattened_dimension = 1 for inner_dim in static_inner_dims: if inner_dim is None: flattened_dimension = None break flattened_dimension *= inner_dim reshaped.set_shape(static_outer_dims + [flattened_dimension]) return reshaped
def _create_zero_outputs(size, dtype, batch_size): """Create a zero outputs Tensor structure.""" def _t(s): return (s if isinstance(s, ops.Tensor) else constant_op.constant( tensor_shape.TensorShape(s).as_list(), dtype=dtypes.int32, name="zero_suffix_shape")) def _create(s, d): return array_ops.zeros( array_ops.concat( ([batch_size], _t(s)), axis=0), dtype=d) return nest.map_structure(_create, size, dtype)
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): c, h = state # change bias argument to False since LN will add bias via shift concat = tf.nn.rnn_cell._linear( [inputs, h], 4 * self._num_units, False) # ipdb.set_trace() i, j, f, o = tf.split(1, 4, concat) # add layer normalization to each gate i = ln(i, scope='i/') j = ln(j, scope='j/') f = ln(f, scope='f/') o = ln(o, scope='o/') new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) # add layer_normalization in calculation of new hidden state new_h = self._activation( ln(new_c, scope='new_h/')) * tf.nn.sigmoid(o) new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h) return new_h, new_state
def init_placeholders(self): # encoder_inputs: [batch_size, max_time_steps] self.encoder_inputs = tf.placeholder(dtype=tf.int32, shape=(None, None), name='encoder_inputs') # encoder_inputs_length: [batch_size] self.encoder_inputs_length = tf.placeholder( dtype=tf.int32, shape=(None,), name='encoder_inputs_length') # get dynamic batch_size self.batch_size = tf.shape(self.encoder_inputs)[0] if self.mode == 'train': # decoder_inputs: [batch_size, max_time_steps] self.decoder_inputs = tf.placeholder( dtype=tf.int32, shape=(None, None), name='decoder_inputs') # decoder_inputs_length: [batch_size] self.decoder_inputs_length = tf.placeholder( dtype=tf.int32, shape=(None,), name='decoder_inputs_length') decoder_start_token = tf.ones( shape=[self.batch_size, 1], dtype=tf.int32) * data_utils.start_token decoder_end_token = tf.ones( shape=[self.batch_size, 1], dtype=tf.int32) * data_utils.end_token # decoder_inputs_train: [batch_size , max_time_steps + 1] # insert _GO symbol in front of each decoder input self.decoder_inputs_train = tf.concat([decoder_start_token, self.decoder_inputs], axis=1) # decoder_inputs_length_train: [batch_size] self.decoder_inputs_length_train = self.decoder_inputs_length + 1 # decoder_targets_train: [batch_size, max_time_steps + 1] # insert EOS symbol at the end of each decoder input self.decoder_targets_train = tf.concat([self.decoder_inputs, decoder_end_token], axis=1)