我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.add_n()。
def reduce_sum_n(tensors, name=None): """Reduce tensors to a scalar sum. This reduces each tensor in `tensors` to a scalar via `tf.reduce_sum`, then adds them via `tf.add_n`. Args: tensors: List of tensors, all of the same numeric type. name: Tensor name, and scope for all other ops. Returns: Total loss tensor, or None if no losses have been configured. Raises: ValueError: if `losses` is missing or empty. """ if not tensors: raise ValueError('No tensors provided.') tensors = [math_ops.reduce_sum(t, name='%s/sum' % t.op.name) for t in tensors] if len(tensors) == 1: return tensors[0] with ops.name_scope(name, 'reduce_sum_n', tensors) as scope: return math_ops.add_n(tensors, name=scope)
def get_total_loss(add_regularization_losses=True, name="total_loss"): """Returns a tensor whose value represents the total loss. Notice that the function adds the given losses to the regularization losses. Args: add_regularization_losses: A boolean indicating whether or not to use the regularization losses in the sum. name: The name of the returned tensor. Returns: A `Tensor` whose value represents the total loss. Raises: ValueError: if `losses` is not iterable. """ losses = get_losses() if add_regularization_losses: losses += get_regularization_losses() return math_ops.add_n(losses, name=name)
def approximate_duality_gap(self): """Add operations to compute the approximate duality gap. Returns: An Operation that computes the approximate duality gap over all examples. """ with name_scope('sdca/approximate_duality_gap'): _, values_list = self._hashtable.export_sharded() shard_sums = [] for values in values_list: with ops.device(values.device): shard_sums.append( math_ops.reduce_sum(math_ops.cast(values, dtypes.float64), 0)) summed_values = math_ops.add_n(shard_sums) primal_loss = summed_values[1] dual_loss = summed_values[2] example_weights = summed_values[3] # Note: we return NaN if there are no weights or all weights are 0, e.g. # if no examples have been processed return (primal_loss + dual_loss + self._l1_loss() + (2.0 * self._l2_loss(self._symmetric_l2_regularization())) ) / example_weights
def sum_regularizer(regularizer_list, scope=None): """Returns a function that applies the sum of multiple regularizers. Args: regularizer_list: A list of regularizers to apply. scope: An optional scope name Returns: A function with signature `sum_reg(weights)` that applies the sum of all the input regularizers. """ regularizer_list = [reg for reg in regularizer_list if reg is not None] if not regularizer_list: return None def sum_reg(weights): """Applies the sum of all the input regularizers.""" with ops.name_scope(scope, 'sum_regularizer', [weights]) as name: regularizer_tensors = [reg(weights) for reg in regularizer_list] return math_ops.add_n(regularizer_tensors, name=name) return sum_reg
def _mean(self): with ops.control_dependencies(self._assertions): distribution_means = [d.mean() for d in self.components] cat_probs = self._cat_probs(log_probs=False) # This was checked to not be None at construction time. static_event_rank = self.get_event_shape().ndims # Expand the rank of x up to static_event_rank times so that # broadcasting works correctly. def expand(x): expanded_x = x for _ in range(static_event_rank): expanded_x = array_ops.expand_dims(expanded_x, -1) return expanded_x cat_probs = [expand(c_p) for c_p in cat_probs] partial_means = [ c_p * m for (c_p, m) in zip(cat_probs, distribution_means) ] # These should all be the same shape by virtue of matching # batch_shape and event_shape. return math_ops.add_n(partial_means)
def sequence_loss_by_example(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with tf.name_scope(name, "sequence_loss_by_example",logits + targets + weights): log_perp_list = [] for logit, target, weight in zip(logits, targets, weights): if softmax_loss_function is None: # TODO(irving,ebrevdo): This reshape is needed because # sequence_loss_by_example is called with scalars sometimes, which # violates our general scalar strictness policy. target = array_ops.reshape(target, [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( logit, target) else: crossent = softmax_loss_function(logit, target) log_perp_list.append(crossent * weight) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def sum_regularizer(regularizer_list): """Returns a function that applies the sum of multiple regularizers. Args: regularizer_list: A list of regularizers to apply. Returns: A function with signature `sum_reg(weights, name=None)` that applies the sum of all the input regularizers. """ regularizer_list = [reg for reg in regularizer_list if reg is not None] if not regularizer_list: return None def sum_reg(weights, name=None): """Applies the sum of all the input regularizers.""" with ops.op_scope([weights], name, 'sum_regularizer') as scope: regularizer_tensors = [reg(weights) for reg in regularizer_list] return math_ops.add_n(regularizer_tensors, name=scope) return sum_reg
def reduce_sum_n(tensors, name=None): """Reduce tensors to a scalar sum. This reduces each tensor in `tensors` to a scalar via `tf.reduce_sum`, then adds them via `tf.add_n`. Args: tensors: List of tensors, all of the same numeric type. name: Tensor name, and scope for all other ops. Returns: Total loss tensor, or None if no losses have been configured. Raises: ValueError: if `losses` is missing or empty. """ if not tensors: raise ValueError('No tensors provided.') with ops.name_scope(name, 'reduce_sum_n', tensors) as name_scope: tensors = [ math_ops.reduce_sum(t, name='%s/sum' % t.op.name) for t in tensors] if len(tensors) == 1: return tensors[0] return math_ops.add_n(tensors, name=name_scope)
def _init_clusters_random(self): """Does random initialization of clusters. Returns: Tensor of randomly initialized clusters. """ num_data = math_ops.add_n([array_ops.shape(inp)[0] for inp in self._inputs]) # Note that for mini-batch k-means, we should ensure that the batch size of # data used during initialization is sufficiently large to avoid duplicated # clusters. with ops.control_dependencies( [check_ops.assert_less_equal(self._num_clusters, num_data)]): indices = random_ops.random_uniform( array_ops.reshape(self._num_clusters, [-1]), minval=0, maxval=math_ops.cast(num_data, dtypes.int64), seed=self._random_seed, dtype=dtypes.int64) clusters_init = embedding_lookup( self._inputs, indices, partition_strategy='div') return clusters_init
def _prepare_gramian(self, factors, gramian): """Helper function to create ops to prepare/calculate gramian. Args: factors: Variable or list of Variable representing (sharded) factors. Used to compute the updated corresponding gramian value. gramian: Variable storing the gramian calculated from the factors. Returns: A op that updates the gramian with the calcuated value from the factors. """ partial_gramians = [] for f in factors: with ops.colocate_with(f): partial_gramians.append(math_ops.matmul(f, f, transpose_a=True)) with ops.colocate_with(gramian): prep_gramian = state_ops.assign(gramian, math_ops.add_n(partial_gramians)).op return prep_gramian
def sequence_loss_by_example(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits (per example). Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, default: "sequence_loss_by_example". Returns: 1D batch-sized float Tensor: The log-perplexity for each sequence. Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.name_scope( name, "sequence_loss_by_example",logits + targets + weights): log_perp_list = [] for logit, target, weight in zip(logits, targets, weights): if softmax_loss_function is None: target = array_ops.reshape(target, [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( logit, target) else: crossent = softmax_loss_function(logit, target) log_perp_list.append(crossent * weight) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def _add_n_or_sum(terms): # add_n works for Tensors of the same dtype and shape shape = terms[0].get_shape() dtype = terms[0].dtype if all(term.get_shape().is_fully_defined() and term.get_shape().is_compatible_with(shape) and term.dtype == dtype for term in terms): return math_ops.add_n(terms) else: return sum(terms)
def size(self, name=None): with ops.name_scope(name, 'sharded_mutable_hash_table_size'): sizes = [ self._table_shards[i].size() for i in range(self._num_shards) ] return math_ops.add_n(sizes)
def _l1_loss(self): """Computes the (un-normalized) l1 loss of the model.""" with name_scope('sdca/l1_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: for weights in self._convert_n_to_tensor(self._variables[name]): with ops.device(weights.device): sums.append( math_ops.reduce_sum( math_ops.abs(math_ops.cast(weights, dtypes.float64)))) sum = math_ops.add_n(sums) # SDCA L1 regularization cost is: l1 * sum(|weights|) return self._options['symmetric_l1_regularization'] * sum
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. Args: decoding: List of Tensors with predictions. labels: List of Tensors with labels. sampling_decoding: Optional, List of Tensor with predictions to be used in sampling. E.g. they shouldn't have dependncy on outputs. If not provided, decoding is used. name: Operation name. Returns: Predictions and losses tensors. """ with ops.name_scope(name, "sequence_classifier", [decoding, labels]): predictions, xent_list = [], [] for i, pred in enumerate(decoding): xent_list.append(nn.softmax_cross_entropy_with_logits( pred, labels[i], name="sequence_loss/xent_raw{0}".format(i))) if sampling_decoding: predictions.append(nn.softmax(sampling_decoding[i])) else: predictions.append(nn.softmax(pred)) xent = math_ops.add_n(xent_list, name="sequence_loss/xent") loss = math_ops.reduce_sum(xent, name="sequence_loss") return array_ops_.pack(predictions, axis=1), loss
def apply_regularization(regularizer, weights_list=None): """Returns the summed penalty by applying `regularizer` to the `weights_list`. Adding a regularization penalty over the layer weights and embedding weights can help prevent overfitting the training data. Regularization over layer biases is less common/useful, but assuming proper data preprocessing/mean subtraction, it usually shouldn't hurt much either. Args: regularizer: A function that takes a single `Tensor` argument and returns a scalar `Tensor` output. weights_list: List of weights `Tensors` or `Variables` to apply `regularizer` over. Defaults to the `GraphKeys.WEIGHTS` collection if `None`. Returns: A scalar representing the overall regularization penalty. Raises: ValueError: If `regularizer` does not return a scalar output, or if we find no weights. """ if not weights_list: weights_list = ops.get_collection(ops.GraphKeys.WEIGHTS) if not weights_list: raise ValueError('No weights to regularize.') with ops.name_scope('get_regularization_penalty', values=weights_list) as scope: penalties = [regularizer(w) for w in weights_list] for p in penalties: if p.get_shape().ndims != 0: raise ValueError('regularizer must return a scalar Tensor instead of a ' 'Tensor with rank %d.' % p.get_shape().ndims) summed_penalty = math_ops.add_n(penalties, name=scope) ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, summed_penalty) return summed_penalty
def approximate_duality_gap(self): """Add operations to compute the approximate duality gap. Returns: An Operation that computes the approximate duality gap over all examples. """ with name_scope('sdca/approximate_duality_gap'): _, values_list = self._hashtable.export_sharded() shard_sums = [] for values in values_list: with ops.device(values.device): # For large tables to_double() below allocates a large temporary # tensor that is freed once the sum operation completes. To reduce # peak memory usage in cases where we have multiple large tables on a # single device, we serialize these operations. # Note that we need double precision to get accurate results. with ops.control_dependencies(shard_sums): shard_sums.append( math_ops.reduce_sum(math_ops.to_double(values), 0)) summed_values = math_ops.add_n(shard_sums) primal_loss = summed_values[1] dual_loss = summed_values[2] example_weights = summed_values[3] # Note: we return NaN if there are no weights or all weights are 0, e.g. # if no examples have been processed return (primal_loss + dual_loss + self._l1_loss() + (2.0 * self._l2_loss(self._symmetric_l2_regularization())) ) / example_weights
def _scaled_dot_product(scale, xs, ys, name=None): """Calculate a scaled, vector inner product between lists of Tensors.""" with ops.name_scope(name, 'scaled_dot_product', [scale, xs, ys]) as scope: # Some of the parameters in our Butcher tableau include zeros. Using # _possibly_nonzero lets us avoid wasted computation. return math_ops.add_n([(scale * x) * y for x, y in zip(xs, ys) if _possibly_nonzero(x) or _possibly_nonzero(y)], name=scope)
def _dot_product(xs, ys, name=None): """Calculate the vector inner product between two lists of Tensors.""" with ops.name_scope(name, 'dot_product', [xs, ys]) as scope: return math_ops.add_n([x * y for x, y in zip(xs, ys)], name=scope)
def sequence_loss_by_example(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.name_scope(name, "sequence_loss_by_example", logits + targets + weights): log_perp_list = [] for logit, target, weight in zip(logits, targets, weights): if softmax_loss_function is None: # TODO(irving,ebrevdo): This reshape is needed because # sequence_loss_by_example is called with scalars sometimes, which # violates our general scalar strictness policy. target = array_ops.reshape(target, [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( logit, target) else: crossent = softmax_loss_function(logit, target) log_perp_list.append(crossent * weight) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def apply_regularization(regularizer, weights_list=None): """Returns the summed penalty by applying `regularizer` to the `weights_list`. Adding a regularization penalty over the layer weights and embedding weights can help prevent overfitting the training data. Regularization over layer biases is less common/useful, but assuming proper data preprocessing/mean subtraction, it usually shouldn't hurt much either. Args: regularizer: A function that takes a single `Tensor` argument and returns a scalar `Tensor` output. weights_list: List of weights `Tensors` or `Variables` to apply `regularizer` over. Defaults to the `GraphKeys.WEIGHTS` collection if `None`. Returns: A scalar representing the overall regularization penalty. Raises: ValueError: If `regularizer` does not return a scalar output. """ if not weights_list: weights_list = ops.get_collection(ops.GraphKeys.WEIGHTS) with ops.op_scope(weights_list, 'get_regularization_penalty') as scope: penalties = [regularizer(w) for w in weights_list] for p in penalties: if p.get_shape().ndims != 0: raise ValueError('regularizer must return a scalar Tensor instead of a ' 'Tensor with rank %d.' % p.get_shape().ndims) summed_penalty = math_ops.add_n(penalties, name=scope) ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, summed_penalty) return summed_penalty
def _testSurrogateLoss(self, session, losses, expected_addl_terms, xs): surrogate_loss = sg.surrogate_loss(losses) expected_surrogate_loss = math_ops.add_n(losses + expected_addl_terms) self.assertAllClose(*session.run([surrogate_loss, expected_surrogate_loss])) # Test backprop expected_grads = gradients_impl.gradients(ys=expected_surrogate_loss, xs=xs) surrogate_grads = gradients_impl.gradients(ys=surrogate_loss, xs=xs) self.assertEqual(len(expected_grads), len(surrogate_grads)) grad_values = session.run(expected_grads + surrogate_grads) n_grad = len(expected_grads) self.assertAllClose(grad_values[:n_grad], grad_values[n_grad:])
def _l2_loss(self, l2): """Computes the (un-normalized) l2 loss of the model.""" with name_scope('sdca/l2_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: for weights in self._convert_n_to_tensor(self._variables[name]): with ops.device(weights.device): sums.append( math_ops.reduce_sum( math_ops.square(math_ops.cast(weights, dtypes.float64)))) sum = math_ops.add_n(sums) # SDCA L2 regularization cost is: l2 * sum(weights^2) / 2 return l2 * sum / 2.0
def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. Args: decoding: List of Tensors with predictions. labels: List of Tensors with labels. sampling_decoding: Optional, List of Tensor with predictions to be used in sampling. E.g. they shouldn't have dependncy on outputs. If not provided, decoding is used. name: Operation name. Returns: Predictions and losses tensors. """ with ops.name_scope(name, "sequence_classifier", [decoding, labels]): predictions, xent_list = [], [] for i, pred in enumerate(decoding): xent_list.append(nn.softmax_cross_entropy_with_logits( labels=labels[i], logits=pred, name="sequence_loss/xent_raw{0}".format(i))) if sampling_decoding: predictions.append(nn.softmax(sampling_decoding[i])) else: predictions.append(nn.softmax(pred)) xent = math_ops.add_n(xent_list, name="sequence_loss/xent") loss = math_ops.reduce_sum(xent, name="sequence_loss") return array_ops.stack(predictions, axis=1), loss
def apply_regularization(regularizer, weights_list=None): """Returns the summed penalty by applying `regularizer` to the `weights_list`. Adding a regularization penalty over the layer weights and embedding weights can help prevent overfitting the training data. Regularization over layer biases is less common/useful, but assuming proper data preprocessing/mean subtraction, it usually shouldn't hurt much either. Args: regularizer: A function that takes a single `Tensor` argument and returns a scalar `Tensor` output. weights_list: List of weights `Tensors` or `Variables` to apply `regularizer` over. Defaults to the `GraphKeys.WEIGHTS` collection if `None`. Returns: A scalar representing the overall regularization penalty. Raises: ValueError: If `regularizer` does not return a scalar output, or if we find no weights. """ if not weights_list: weights_list = ops.get_collection(ops.GraphKeys.WEIGHTS) if not weights_list: raise ValueError('No weights to regularize.') with ops.name_scope('get_regularization_penalty', values=weights_list) as scope: penalties = [regularizer(w) for w in weights_list] penalties = [ p if p is not None else constant_op.constant(0.0) for p in penalties ] for p in penalties: if p.get_shape().ndims != 0: raise ValueError('regularizer must return a scalar Tensor instead of a ' 'Tensor with rank %d.' % p.get_shape().ndims) summed_penalty = math_ops.add_n(penalties, name=scope) ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, summed_penalty) return summed_penalty
def test_distributive_property(self): """Verifies the distributive property of matrix multiplication.""" with self.test_session(): params = constant_op.constant([.1, .2, .3]) sp_values_a = sparse_tensor_lib.SparseTensor( values=["a"], indices=[[0, 0]], dense_shape=[3, 1]) sp_values_b = sparse_tensor_lib.SparseTensor( values=["b"], indices=[[2, 0]], dense_shape=[3, 1]) sp_values_c = sparse_tensor_lib.SparseTensor( values=["c"], indices=[[2, 0]], dense_shape=[3, 1]) sp_values = sparse_tensor_lib.SparseTensor( values=["a", "b", "c"], indices=[[0, 0], [2, 0], [2, 1]], dense_shape=[3, 2]) result_a = embedding_ops._sampled_scattered_embedding_lookup_sparse( params, sp_values_a, dimension=4, hash_key=self._hash_key) result_b = embedding_ops._sampled_scattered_embedding_lookup_sparse( params, sp_values_b, dimension=4, hash_key=self._hash_key) result_c = embedding_ops._sampled_scattered_embedding_lookup_sparse( params, sp_values_c, dimension=4, hash_key=self._hash_key) result = embedding_ops._sampled_scattered_embedding_lookup_sparse( params, sp_values, dimension=4, hash_key=self._hash_key) result_abc = math_ops.add_n([result_a, result_b, result_c]) self.assertAllClose(result.eval(), result_abc.eval())
def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers): """Creates an op for training for full batch case. Args: inputs: list of input Tensors. cluster_idx_list: A vector (or list of vectors). Each element in the vector corresponds to an input row in 'inp' and specifies the cluster id corresponding to the input. cluster_centers: Tensor Ref of cluster centers. Returns: An op for doing an update of mini-batch k-means. """ cluster_sums = [] cluster_counts = [] epsilon = constant_op.constant(1e-6, dtype=inputs[0].dtype) for inp, cluster_idx in zip(inputs, cluster_idx_list): with ops.colocate_with(inp): cluster_sums.append( math_ops.unsorted_segment_sum(inp, cluster_idx, self._num_clusters)) cluster_counts.append( math_ops.unsorted_segment_sum( array_ops.reshape( array_ops.ones( array_ops.reshape(array_ops.shape(inp)[0], [-1])), [-1, 1]), cluster_idx, self._num_clusters)) with ops.colocate_with(cluster_centers): new_clusters_centers = math_ops.add_n(cluster_sums) / (math_ops.cast( math_ops.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon) if self._clusters_l2_normalized(): new_clusters_centers = nn_impl.l2_normalize(new_clusters_centers, dim=1) return state_ops.assign(cluster_centers, new_clusters_centers)
def sequence_loss_by_example(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits (per example). Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, default: "sequence_loss_by_example". Returns: 1D batch-sized float Tensor: The log-perplexity for each sequence. Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.name_scope(name, "sequence_loss_by_example", logits + targets + weights): log_perp_list = [] for logit, target, weight in zip(logits, targets, weights): if softmax_loss_function is None: # TODO(irving,ebrevdo): This reshape is needed because # sequence_loss_by_example is called with scalars sometimes, which # violates our general scalar strictness policy. target = array_ops.reshape(target, [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=target, logits=logit) else: crossent = softmax_loss_function(target, logit) log_perp_list.append(crossent * weight) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps
def sequence_loss_by_example(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits (per example). Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, default: "sequence_loss_by_example". Returns: 1D batch-sized float Tensor: The log-perplexity for each sequence. Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ if len(targets) != len(logits) or len(weights) != len(logits): raise ValueError("Lengths of logits, weights, and targets must be the same " "%d, %d, %d." % (len(logits), len(weights), len(targets))) with ops.op_scope(logits + targets + weights, name, "sequence_loss_by_example"): log_perp_list = [] for logit, target, weight in zip(logits, targets, weights): if softmax_loss_function is None: # TODO(irving,ebrevdo): This reshape is needed because # sequence_loss_by_example is called with scalars sometimes, which # violates our general scalar strictness policy. target = array_ops.reshape(target, [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=target) else: crossent = softmax_loss_function(logit, target) log_perp_list.append(crossent * weight) log_perps = math_ops.add_n(log_perp_list) if average_across_timesteps: total_size = math_ops.add_n(weights) total_size += 1e-12 # Just to avoid division by 0 for all-0 weights. log_perps /= total_size return log_perps