我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.reduce_sum()。
def get_classification_loss(logits, targets, softmax_loss_function=None): bucket_outputs = logits if softmax_loss_function is None: assert len(bucket_outputs) == len(targets) == 1 # We need to make target an int64-tensor and set its shape. bucket_target = array_ops.reshape(math_ops.to_int64(targets[0]), [-1]) crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(logits=bucket_outputs[0], labels=bucket_target) else: assert len(bucket_outputs) == len(targets) == 1 crossent = softmax_loss_function(bucket_outputs[0], targets[0]) batch_size = array_ops.shape(targets[0])[0] loss = tf.reduce_sum(crossent) / math_ops.cast(batch_size, dtypes.float32) return loss
def inference_graph(self, input_data, data_spec=None): """Constructs a TF graph for evaluating a random forest. Args: input_data: A tensor or SparseTensor or placeholder for input data. data_spec: A list of tf.dtype values specifying the original types of each column. Returns: The last op in the random forest inference graph. """ data_spec = [constants.DATA_FLOAT] if data_spec is None else data_spec probabilities = [] for i in range(self.params.num_trees): with ops.device(self.device_assigner.get_device(i)): tree_data = input_data if self.params.bagged_features: tree_data = self._bag_features(i, input_data) probabilities.append(self.trees[i].inference_graph(tree_data, data_spec)) with ops.device(self.device_assigner.get_device(0)): all_predict = array_ops.pack(probabilities) return math_ops.div( math_ops.reduce_sum(all_predict, 0), self.params.num_trees, name='probabilities')
def _gini(self, class_counts): """Calculate the Gini impurity. If c(i) denotes the i-th class count and c = sum_i c(i) then score = 1 - sum_i ( c(i) / c )^2 Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return 1.0 - sum_squares / (sums * sums)
def _weighted_gini(self, class_counts): """Our split score is the Gini impurity times the number of examples. If c(i) denotes the i-th class count and c = sum_i c(i) then score = c * (1 - sum_i ( c(i) / c )^2 ) = c - sum_i c(i)^2 / c Args: class_counts: A 2-D tensor of per-class counts, usually a slice or gather from variables.node_sums. Returns: A 1-D tensor of the Gini impurities for each row in the input. """ smoothed = 1.0 + array_ops.slice(class_counts, [0, 1], [-1, -1]) sums = math_ops.reduce_sum(smoothed, 1) sum_squares = math_ops.reduce_sum(math_ops.square(smoothed), 1) return sums - sum_squares / sums
def _variance(self, sums, squares): """Calculate the variance for each row of the input tensors. Variance is V = E[x^2] - (E[x])^2. Args: sums: A tensor containing output sums, usually a slice from variables.node_sums. Should contain the number of examples seen in index 0 so we can calculate expected value. squares: Same as sums, but sums of squares. Returns: A 1-D tensor of the variances for each row in the input. """ total_count = array_ops.slice(sums, [0, 0], [-1, 1]) e_x = sums / total_count e_x2 = squares / total_count return math_ops.reduce_sum(e_x2 - math_ops.square(e_x), 1)
def sum(x, axis=None, keepdims=False): """Sum of the values in a tensor, alongside the specified axis. Arguments: x: A tensor or variable. axis: An integer, the axis to sum over. keepdims: A boolean, whether to keep the dimensions or not. If `keepdims` is `False`, the rank of the tensor is reduced by 1. If `keepdims` is `True`, the reduced dimension is retained with length 1. Returns: A tensor with sum of `x`. """ axis = _normalize_axis(axis, ndim(x)) return math_ops.reduce_sum(x, reduction_indices=axis, keep_dims=keepdims)
def reduce_sum_n(tensors, name=None): """Reduce tensors to a scalar sum. This reduces each tensor in `tensors` to a scalar via `tf.reduce_sum`, then adds them via `tf.add_n`. Args: tensors: List of tensors, all of the same numeric type. name: Tensor name, and scope for all other ops. Returns: Total loss tensor, or None if no losses have been configured. Raises: ValueError: if `losses` is missing or empty. """ if not tensors: raise ValueError('No tensors provided.') tensors = [math_ops.reduce_sum(t, name='%s/sum' % t.op.name) for t in tensors] if len(tensors) == 1: return tensors[0] with ops.name_scope(name, 'reduce_sum_n', tensors) as scope: return math_ops.add_n(tensors, name=scope)
def _scale_losses(losses, weight): """Computes the scaled loss. Args: losses: A `Tensor` of size [batch_size, d1, ... dN]. weight: A `Tensor` of size [1], [batch_size] or [batch_size, d1, ... dN]. The `losses` are reduced (tf.reduce_sum) until its dimension matches that of `weight` at which point the reduced `losses` are element-wise multiplied by `weight` and a final reduce_sum is computed on the result. Conceptually, this operation is equivalent to broadcasting (tiling) `weight` to be the same size as `losses`, performing an element-wise multiplication, and summing the result. Returns: A scalar tf.float32 `Tensor` whose value represents the sum of the scaled `losses`. """ # First, compute the sum of the losses over all elements: start_index = max(0, weight.get_shape().ndims) reduction_indices = list(range(start_index, losses.get_shape().ndims)) reduced_losses = math_ops.reduce_sum(losses, reduction_indices=reduction_indices) reduced_losses = math_ops.mul(reduced_losses, weight) return math_ops.reduce_sum(reduced_losses)
def approximate_duality_gap(self): """Add operations to compute the approximate duality gap. Returns: An Operation that computes the approximate duality gap over all examples. """ with name_scope('sdca/approximate_duality_gap'): _, values_list = self._hashtable.export_sharded() shard_sums = [] for values in values_list: with ops.device(values.device): shard_sums.append( math_ops.reduce_sum(math_ops.cast(values, dtypes.float64), 0)) summed_values = math_ops.add_n(shard_sums) primal_loss = summed_values[1] dual_loss = summed_values[2] example_weights = summed_values[3] # Note: we return NaN if there are no weights or all weights are 0, e.g. # if no examples have been processed return (primal_loss + dual_loss + self._l1_loss() + (2.0 * self._l2_loss(self._symmetric_l2_regularization())) ) / example_weights
def _broadcast_weights(weights, values): """Broadcast `weights` to the same shape as `values`. This returns a version of `weights` following the same broadcast rules as `mul(weights, values)`. When computing a weighted average, use this function to broadcast `weights` before summing them; e.g., `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`. Args: weights: `Tensor` whose shape is broadcastable to `values`. values: `Tensor` of any shape. Returns: `weights` broadcast to `values` shape. """ weights_shape = weights.get_shape() values_shape = values.get_shape() if (weights_shape.is_fully_defined() and values_shape.is_fully_defined() and weights_shape.is_compatible_with(values_shape)): return weights return math_ops.mul( weights, array_ops.ones_like(values), name='broadcast_weights')
def _iqfov_via_sqrt_solve(self, x): """Get the inverse quadratic form on vectors via a sqrt_solve.""" # x^{-1} A^{-1} x = || S^{-1}x ||^2, # where S is a square root of A (A = SS^T). # Steps: # 1. Convert x to a matrix, flipping all extra dimensions in `x` to the # final dimension of x_matrix. x_matrix = flip_vector_to_matrix( x, self.batch_shape(), self.get_batch_shape()) # 2. Get soln_matrix = S^{-1} x_matrix soln_matrix = self.sqrt_solve(x_matrix) # 3. Reshape back to a vector. soln = flip_matrix_to_vector( soln_matrix, extract_batch_shape(x, 1), x.get_shape()[:-1]) # 4. L2 (batch) vector norm squared. result = math_ops.reduce_sum( math_ops.square(soln), reduction_indices=[-1]) result.set_shape(x.get_shape()[:-1]) return result
def _sqrt_log_det_core(self, diag_chol_c): """Finish computation of Sqrt[Log[Det]].""" # Complete computation of ._log_det and ._batch_log_det, after the initial # Cholesky factor has been taken with the appropriate batch/non-batch method # det(M + VDV^T) = det(D^{-1} + V^T M^{-1} V) * det(D) * det(M) # = det(C) * det(D) * det(M) # Multiply by 2 here because this is the log-det of the Cholesky factor of C log_det_c = 2 * math_ops.reduce_sum( math_ops.log(diag_chol_c), reduction_indices=[-1]) # Add together to get Log[det(M + VDV^T)], the Log-det of the updated square # root. log_det_updated_sqrt = ( log_det_c + self._diag_operator.log_det() + self._operator.log_det()) return log_det_updated_sqrt
def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with vs.variable_scope("Attention"): k = vs.get_variable("AttnW", [1, 1, self._attn_size, self._attn_vec_size]) v = vs.get_variable("AttnV", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
def _scale_losses(losses, weights): """Computes the scaled loss. Args: losses: A `Tensor` of size [batch_size, d1, ... dN]. weights: A `Tensor` of size [1], [batch_size] or [batch_size, d1, ... dN]. The `losses` are reduced (tf.reduce_sum) until its dimension matches that of `weights` at which point the reduced `losses` are element-wise multiplied by `weights` and a final reduce_sum is computed on the result. Conceptually, this operation is equivalent to broadcasting (tiling) `weights` to be the same size as `losses`, performing an element-wise multiplication, and summing the result. Returns: A scalar tf.float32 `Tensor` whose value represents the sum of the scaled `losses`. """ # First, compute the sum of the losses over all elements: start_index = max(0, weights.get_shape().ndims) reduction_indices = list(range(start_index, losses.get_shape().ndims)) reduced_losses = math_ops.reduce_sum(losses, reduction_indices=reduction_indices) reduced_losses = math_ops.mul(reduced_losses, weights) return math_ops.reduce_sum(reduced_losses)
def regularized_loss(self, examples): """Add operations to compute the loss with regularization loss included. Args: examples: Examples to compute loss on. Returns: An Operation that computes mean (regularized) loss for given set of examples. Raises: ValueError: if examples are not well defined. """ self._assertSpecified(['example_labels', 'example_weights', 'sparse_features', 'dense_features'], examples) self._assertList(['sparse_features', 'dense_features'], examples) with name_scope('sdca/regularized_loss'): weights = convert_to_tensor(examples['example_weights']) return (( self._l1_loss() + # Note that here we are using the raw regularization # (as specified by the user) and *not* # self._symmetric_l2_regularization(). self._l2_loss(self._options['symmetric_l2_regularization'])) / math_ops.reduce_sum(math_ops.cast(weights, dtypes.float64)) + self.unregularized_loss(examples))
def _iqfov_via_solve(self, x): """Get the inverse quadratic form on vectors via a solve.""" # x^{-1} A^{-1} x # 1. Convert x to a matrix, flipping all extra dimensions in `x` to the # final dimension of x_matrix. x_matrix = flip_vector_to_matrix( x, self.batch_shape(), self.get_batch_shape()) # 2. Get x_whitened_matrix = A^{-1} x_matrix soln_matrix = self.solve(x_matrix) # 3. Reshape back to a vector. soln = flip_matrix_to_vector( soln_matrix, extract_batch_shape(x, 1), x.get_shape()[:-1]) # 4. Compute the dot product: x^T soln result = math_ops.reduce_sum(x * soln, reduction_indices=[-1]) result.set_shape(x.get_shape()[:-1]) return result
def _kl_categorical_categorical(a, b, name=None): """Calculate the batched KL divergence KL(a || b) with a and b Categorical. Args: a: instance of a Categorical distribution object. b: instance of a Categorical distribution object. name: (optional) Name to use for created operations. default is "kl_categorical_categorical". Returns: Batchwise KL(a || b) """ with ops.name_scope( name, "kl_categorical_categorical", [a.logits, b.logits]): # sum(p*ln(p/q)) return math_ops.reduce_sum( nn_ops.softmax(a.logits)*(nn_ops.log_softmax(a.logits) - nn_ops.log_softmax(b.logits)), reduction_indices=[-1])
def _inverse_log_det_jacobian(self, y): # WLOG, consider the vector case: # x = log(y[:-1]) - log(y[-1]) # where, # y[-1] = 1 - sum(y[:-1]). # We have: # det{ dX/dY } = det{ diag(1 ./ y[:-1]) + 1 / y[-1] } # = det{ inv{ diag(y[:-1]) - y[:-1]' y[:-1] } } (1) # = 1 / det{ diag(y[:-1]) - y[:-1]' y[:-1] } # = 1 / { (1 + y[:-1]' inv(diag(y[:-1])) y[:-1]) * # det(diag(y[:-1])) } (2) # = 1 / { y[-1] prod(y[:-1]) } # = 1 / prod(y) # (1) - https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula # or by noting that det{ dX/dY } = 1 / det{ dY/dX } from Bijector # docstring "Tip". # (2) - https://en.wikipedia.org/wiki/Matrix_determinant_lemma return -math_ops.reduce_sum(math_ops.log(y), reduction_indices=-1)
def _attention(self, query, attn_states): conv2d = nn_ops.conv2d reduce_sum = math_ops.reduce_sum softmax = nn_ops.softmax tanh = math_ops.tanh with tf.variable_scope("attention"): k = tf.get_variable( "attn_w", [1, 1, self._attn_size, self._attn_vec_size]) v = tf.get_variable("attn_v", [self._attn_vec_size]) hidden = array_ops.reshape(attn_states, [-1, self._attn_length, 1, self._attn_size]) hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME") y = _linear(query, self._attn_vec_size, True) y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size]) s = reduce_sum(v * tanh(hidden_features + y), [2, 3]) a = softmax(s) d = reduce_sum( array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2]) new_attns = array_ops.reshape(d, [-1, self._attn_size]) new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1]) return new_attns, new_attn_states
def sequence_loss_by_mle(logits, targets, vocab_size, sequence_length, batch_size, output_projection=None): #print("logits: ", np.shape(logits[0])) #logits: [seq_len, batch_size, emb_dim] #targets: [seq_len, batch_size] =====transpose====> [batch_size, seq_len] # labels = tf.to_int32(tf.transpose(targets)) #targets: [seq_len, batch_size] ====reshape[-1]====> [seq_len * batch_size] labels = tf.to_int32(tf.reshape(targets, [-1])) if output_projection is not None: #logits = nn_ops.xw_plus_b(logits, output_projection[0], output_projection[1]) logits = [tf.matmul(logit, output_projection[0]) + output_projection[1] for logit in logits] reshape_logits = tf.reshape(logits, [-1, vocab_size]) #[seq_len * batch_size, vocab_size] prediction = tf.clip_by_value(reshape_logits, 1e-20, 1.0) pretrain_loss = -tf.reduce_sum( # [seq_len * batch_size , vocab_size] tf.one_hot(labels, vocab_size, 1.0, 0.0) * tf.log(prediction) ) / (sequence_length * batch_size) return pretrain_loss
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or type(self).__name__): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u, g = array_ops.split(1, 3, _linear([inputs, state], 3 * self._num_units, True, 1.0)) r, u, g = sigmoid(r), sigmoid(u), sigmoid(g) with vs.variable_scope("Candidate"): c = self._activation(_linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c eps = 1e-13 temp = math_ops.div(math_ops.reduce_sum(math_ops.mul(new_h, state),1), \ math_ops.reduce_sum(math_ops.mul(state,state),1) + eps) m = array_ops.transpose(g) t1 = math_ops.mul(m , temp) t1 = array_ops.transpose(t1) distract_h = new_h - state * t1 return distract_h, distract_h
def kullback_leibler_divergence(weights=1.0, name='KullbackLeiberDivergence', scope=None, collect=False): """Adds a Kullback leiber diverenge loss to the training procedure. Args: name: name of the op. scope: The scope for the operations performed in computing the loss. collect: add to losses collection. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If `predictions` shape doesn't match `labels` shape, or `weights` is `None`. """ def inner_loss(y_true, y_pred): y_true = clip(y_true, EPSILON, 1) y_pred = clip(y_pred, EPSILON, 1) losses = tf.reduce_sum(input_tensor=y_true * tf.log(x=y_true / y_pred), axis=-1) return losses return built_loss(inner_loss, weights, name, scope, collect)
def reduce_sum_n(tensors, name=None): """Reduce tensors to a scalar sum. This reduces each tensor in `tensors` to a scalar via `tf.reduce_sum`, then adds them via `tf.add_n`. Args: tensors: List of tensors, all of the same numeric type. name: Tensor name, and scope for all other ops. Returns: Total loss tensor, or None if no losses have been configured. Raises: ValueError: if `losses` is missing or empty. """ if not tensors: raise ValueError('No tensors provided.') with ops.name_scope(name, 'reduce_sum_n', tensors) as name_scope: tensors = [ math_ops.reduce_sum(t, name='%s/sum' % t.op.name) for t in tensors] if len(tensors) == 1: return tensors[0] return math_ops.add_n(tensors, name=name_scope)
def testDefaultsSampleKLWithoutAnalyticKLOrEntropy(self): x = constant_op.constant([[-6., 3., 6.]]) prior = distributions.Bernoulli(0.5) variational = st.StochasticTensor( NormalNoEntropy( loc=inference_net(x, 1), scale=1.)) vi.register_prior(variational, prior) px = distributions.Normal(loc=generative_net(variational, 3), scale=1.) log_likelihood = math_ops.reduce_sum(px.log_prob(x), 1) # No analytic KL available between prior and variational distributions. with self.assertRaisesRegexp(NotImplementedError, "No KL"): distributions.kl(variational.distribution, prior) elbo = vi.elbo( variational_with_prior={variational: prior}, log_likelihood=log_likelihood) expected_elbo = log_likelihood + prior.log_prob( variational) - variational.distribution.log_prob(variational) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) self.assertAllEqual(*sess.run([expected_elbo, elbo]))
def _scale_losses(losses, weights): """Computes the scaled loss. Args: losses: A `Tensor` of size [batch_size, d1, ... dN]. weights: A `Tensor` of size [1], [batch_size] or [batch_size, d1, ... dN]. The `losses` are reduced (tf.reduce_sum) until its dimension matches that of `weights` at which point the reduced `losses` are element-wise multiplied by `weights` and a final reduce_sum is computed on the result. Conceptually, this operation is equivalent to broadcasting (tiling) `weights` to be the same size as `losses`, performing an element-wise multiplication, and summing the result. Returns: A scalar tf.float32 `Tensor` whose value represents the sum of the scaled `losses`. """ # First, compute the sum of the losses over all elements: start_index = max(0, weights.get_shape().ndims) reduction_indices = list(range(start_index, losses.get_shape().ndims)) reduced_losses = math_ops.reduce_sum(losses, reduction_indices=reduction_indices) reduced_losses = math_ops.multiply(reduced_losses, weights) return math_ops.reduce_sum(reduced_losses)
def testUnitNormWithRandomMatrix(self): height, width = 2, 3 for dim in range(3): random_seed.set_random_seed(0) image = random_ops.random_uniform((height, width, 3)) output = _layers.unit_norm(image, dim=dim, epsilon=1e-6) norms = math_ops.sqrt( math_ops.reduce_sum( math_ops.square(output), reduction_indices=dim)) shape = [height, width, 3] del shape[dim] expected = np.ones(shape) with self.test_session(): actual = norms.eval() self.assertAllClose(expected, actual, 1e-4, 1e-4)
def testKnownRankUnknownDimsSucceeds(self): height, width = 2, 3 for dim in range(3): placeholder_value = np.ones((height, width, 3)) shape = [height, width, 3] del shape[dim] expected = np.ones(shape) image = array_ops.placeholder(dtypes.float32, (None, None, 3)) output = _layers.unit_norm(image, dim=dim, epsilon=1e-6) norms = math_ops.sqrt( math_ops.reduce_sum( math_ops.square(output), reduction_indices=dim)) with self.test_session(): actual = norms.eval({image: placeholder_value}) self.assertAllClose(expected, actual, 1e-4, 1e-4) # TODO(b/28426988): Add separate tests for non-legacy versions.
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope( name, "sequence_loss",logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum( sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss_by_batch(logits, targets, weights, average_across_timesteps=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed (averaged). Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.op_scope(logits + targets + weights, name, "sequence_loss_by_batch"): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32)
def sequence_loss(targets, logits, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum( sequence_loss_by_example( targets, logits, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def sequence_loss(logits, targets, weights, name): """TODO(nh2tran): docstring. Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ #~ with tf.name_scope(name=name, #~ values=logits + targets + weights): with ops.op_scope(logits + targets + weights, name): cost = math_ops.reduce_sum(sequence_loss_per_sample(logits, targets, weights)) batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32)
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.op_scope(logits + targets + weights, name, "sequence_loss"): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example( logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, cost.dtype) else: return cost
def categorical_crossentropy(output, target, from_logits=False): """Categorical crossentropy between an output tensor and a target tensor. Arguments: output: A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits). target: A tensor of the same shape as `output`. from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits. Returns: Output tensor. """ # Note: nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if not from_logits: # scale preds so that the class probas of each sample sum to 1 output /= math_ops.reduce_sum( output, reduction_indices=len(output.get_shape()) - 1, keep_dims=True) # manual computation of crossentropy epsilon = _to_tensor(_EPSILON, output.dtype.base_dtype) output = clip_ops.clip_by_value(output, epsilon, 1. - epsilon) return -math_ops.reduce_sum( target * math_ops.log(output), reduction_indices=len(output.get_shape()) - 1) else: return nn.softmax_cross_entropy_with_logits(labels=target, logits=output)
def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): """Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). Raises: ValueError: If len(logits) is different from len(targets) or len(weights). """ with ops.name_scope(name, "sequence_loss", logits + targets + weights): cost = math_ops.reduce_sum(sequence_loss_by_example(logits, targets, weights, average_across_timesteps=average_across_timesteps, softmax_loss_function=softmax_loss_function)) if average_across_batch: batch_size = array_ops.shape(targets[0])[0] return cost / math_ops.cast(batch_size, dtypes.float32) else: return cost
def crf_unary_score(tag_indices, sequence_lengths, inputs): """Computes the unary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. Returns: unary_scores: A [batch_size] vector of unary scores. """ batch_size = array_ops.shape(inputs)[0] max_seq_len = array_ops.shape(inputs)[1] num_tags = array_ops.shape(inputs)[2] flattened_inputs = array_ops.reshape(inputs, [-1]) offsets = array_ops.expand_dims( math_ops.range(batch_size) * max_seq_len * num_tags, 1) offsets += array_ops.expand_dims(math_ops.range(max_seq_len) * num_tags, 0) flattened_tag_indices = array_ops.reshape(offsets + tag_indices, [-1]) unary_scores = array_ops.reshape( array_ops.gather(flattened_inputs, flattened_tag_indices), [batch_size, max_seq_len]) masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1]) unary_scores = math_ops.reduce_sum(unary_scores * masks, 1) return unary_scores
def crf_binary_score(tag_indices, sequence_lengths, transition_params): """Computes the binary scores of tag sequences. Args: tag_indices: A [batch_size, max_seq_len] matrix of tag indices. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] matrix of binary potentials. Returns: binary_scores: A [batch_size] vector of binary scores. """ # Get shape information. num_tags = transition_params.get_shape()[0] num_transitions = array_ops.shape(tag_indices)[1] - 1 # Truncate by one on each side of the sequence to get the start and end # indices of each transition. start_tag_indices = array_ops.slice(tag_indices, [0, 0], [-1, num_transitions]) end_tag_indices = array_ops.slice(tag_indices, [0, 1], [-1, num_transitions]) # Encode the indices in a flattened representation. flattened_transition_indices = start_tag_indices * num_tags + end_tag_indices flattened_transition_params = array_ops.reshape(transition_params, [-1]) # Get the binary scores based on the flattened representation. binary_scores = array_ops.gather(flattened_transition_params, flattened_transition_indices) masks = _lengths_to_masks(sequence_lengths, array_ops.shape(tag_indices)[1]) truncated_masks = array_ops.slice(masks, [0, 1], [-1, -1]) binary_scores = math_ops.reduce_sum(binary_scores * truncated_masks, 1) return binary_scores
def _safe_mean(losses, num_present): """Computes a safe mean of the losses. Args: losses: A tensor whose elements contain individual loss measurements. num_present: The number of measurable losses in the tensor. Returns: A scalar representing the mean of the losses. If `num_present` is zero, then zero is returned. """ total_loss = math_ops.reduce_sum(losses) return _safe_div(total_loss, num_present)
def cosine_distance(predictions, targets, dim, weight=1.0, scope=None): """Adds a cosine-distance loss to the training procedure. Note that the function assumes that the predictions and targets are already unit-normalized. Args: predictions: An arbitrary matrix. targets: A `Tensor` whose shape matches 'predictions' dim: The dimension along which the cosine distance is computed. weight: Coefficients for the loss a scalar, a tensor of shape [batch_size] or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If predictions.shape doesn't match targets.shape, if the ignore mask is provided and its shape doesn't match targets.shape or if the ignore mask is not boolean valued. """ with ops.name_scope(scope, "cosine_distance_loss", [predictions, targets]) as scope: predictions.get_shape().assert_is_compatible_with(targets.get_shape()) if weight is None: raise ValueError("`weight` cannot be None") predictions = math_ops.to_float(predictions) targets = math_ops.to_float(targets) radial_diffs = math_ops.mul(predictions, targets) losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,]) return compute_weighted_loss(losses, weight)
def _l1_loss(self): """Computes the (un-normalized) l1 loss of the model.""" with name_scope('sdca/l1_loss'): sums = [] for name in ['sparse_features_weights', 'dense_features_weights']: for weights in self._convert_n_to_tensor(self._variables[name]): with ops.device(weights.device): sums.append( math_ops.reduce_sum( math_ops.abs(math_ops.cast(weights, dtypes.float64)))) sum = math_ops.add_n(sums) # SDCA L1 regularization cost is: l1 * sum(|weights|) return self._options['symmetric_l1_regularization'] * sum