我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.array_ops.squeeze()。
def _get_eval_ops(self, features, targets, metrics): features, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, training=False, **self.construction_args) probabilities = graph_builder.inference_graph(features, data_spec=spec) # One-hot the labels. if not self.params.regression: labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64( array_ops.squeeze(labels)), self.params.num_classes, 1, 0)) if metrics is None: metrics = {self.accuracy_metric: eval_metrics.get_metric(self.accuracy_metric)} result = {} for name, metric in six.iteritems(metrics): result[name] = metric(probabilities, labels) return result
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
def predict(self, x, **kwargs): """Returns predictions for the given test data. Arguments: x: array-like, shape `(n_samples, n_features)` Test samples where n_samples in the number of samples and n_features is the number of features. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.predict`. Returns: preds: array-like, shape `(n_samples,)` Predictions. """ kwargs = self.filter_sk_params(Sequential.predict, kwargs) return np.squeeze(self.model.predict(x, **kwargs))
def _apply_transform(self, input_tensors, **kwargs): """Applies the transformation to the `transform_input`. Args: input_tensors: a list of Tensors representing the input to the Transform. **kwargs: Additional keyword arguments, unused here. Returns: A namedtuple of Tensors representing the transformed output. """ input_tensor = input_tensors[0] mask = input_tensors[1] if mask.get_shape().ndims > 1: mask = array_ops.squeeze(mask) if isinstance(input_tensor, ops.SparseTensor): mask_fn = sparse_boolean_mask else: mask_fn = array_ops.boolean_mask # pylint: disable=not-callable return self.return_type(mask_fn(input_tensor, mask))
def _logits_to_predictions(self, logits): """Returns a dict of predictions. Args: logits: logits `Tensor` after applying possible centered bias. Returns: Dict of prediction `Tensor` keyed by `PredictionKey`. """ predictions = {} if self.logits_dimension == 1: predictions[prediction_key.PredictionKey.SCORES] = array_ops.squeeze( logits, squeeze_dims=[1]) else: predictions[prediction_key.PredictionKey.SCORES] = logits return predictions
def _apply_transform(self, input_tensors, **kwargs): """Applies the transformation to the `transform_input`. Args: input_tensors: a list of Tensors representing the input to the Transform. **kwargs: Additional keyword arguments, unused here. Returns: A namedtuple of Tensors representing the transformed output. """ input_tensor = input_tensors[0] mask = input_tensors[1] if mask.get_shape().ndims > 1: mask = array_ops.squeeze(mask) if isinstance(input_tensor, sparse_tensor_py.SparseTensor): mask_fn = sparse_boolean_mask else: mask_fn = array_ops.boolean_mask # pylint: disable=not-callable return self.return_type(mask_fn(input_tensor, mask))
def loss(self, data, labels): """The loss to minimize while training.""" if self.is_regression: diff = self.training_inference_graph(data) - math_ops.to_float(labels) mean_squared_error = math_ops.reduce_mean(diff * diff) root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss") loss = root_mean_squared_error else: loss = math_ops.reduce_mean( nn_ops.sparse_softmax_cross_entropy_with_logits( self.training_inference_graph(data), array_ops.squeeze(math_ops.to_int32(labels))), name="loss") if self.regularizer: loss += layers.apply_regularization(self.regularizer, variables.trainable_variables()) return loss
def _forward_log_det_jacobian(self, x): if self._static_event_ndims == 0: return x - 2. * nn_ops.softplus(x) else: # This code is similar to nn_ops.log_softmax but different because we have # an implicit zero column to handle. I.e., instead of: # reduce_sum(logits - reduce_sum(exp(logits), dim)) # we must do: # log_normalization = 1 + reduce_sum(exp(logits)) # -log_normalization + reduce_sum(logits - log_normalization) log_normalization = nn_ops.softplus( math_ops.reduce_logsumexp(x, reduction_indices=-1, keep_dims=True)) fldj = (-log_normalization + math_ops.reduce_sum(x - log_normalization, reduction_indices=-1, keep_dims=True)) return array_ops.squeeze(fldj, squeeze_dims=-1)
def testCrfSequenceScore(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) with self.test_session() as sess: sequence_score = crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) sequence_score = array_ops.squeeze(sequence_score, [0]) tf_sequence_score = sess.run(sequence_score) expected_unary_score = sum(inputs[i][tag_indices[i]] for i in range(sequence_lengths)) expected_binary_score = sum( transition_params[tag_indices[i], tag_indices[i + 1]] for i in range(sequence_lengths - 1)) expected_sequence_score = expected_unary_score + expected_binary_score self.assertAllClose(tf_sequence_score, expected_sequence_score)
def testCrfBinaryScore(self): tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) with self.test_session() as sess: binary_score = crf.crf_binary_score( tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) binary_score = array_ops.squeeze(binary_score, [0]) tf_binary_score = sess.run(binary_score) expected_binary_score = sum( transition_params[tag_indices[i], tag_indices[i + 1]] for i in range(sequence_lengths - 1)) self.assertAllClose(tf_binary_score, expected_binary_score)
def _define_partial_maximization_operation(self, shard_id, shard): """Computes the partial statistics of the means and covariances. Args: shard_id: current shard id. shard: current data shard, 1 X num_examples X dimensions. """ # Soft assignment of each data point to each of the two clusters. self._points_in_k[shard_id] = math_ops.reduce_sum( self._w[shard_id], 0, keep_dims=True) # Partial means. w_mul_x = array_ops.expand_dims( math_ops.matmul( self._w[shard_id], array_ops.squeeze(shard, [0]), transpose_a=True), 1) self._w_mul_x.append(w_mul_x) # Partial covariances. x = array_ops.concat([shard for _ in range(self._num_classes)], 0) x_trans = array_ops.transpose(x, perm=[0, 2, 1]) x_mul_w = array_ops.concat([ array_ops.expand_dims(x_trans[k, :, :] * self._w[shard_id][:, k], 0) for k in range(self._num_classes) ], 0) self._w_mul_x2.append(math_ops.matmul(x_mul_w, x))
def tree_initialization(self): def _init_tree(): return state_ops.scatter_update(self.variables.tree, [0], [[-1, -1]]).op def _nothing(): return control_flow_ops.no_op() return control_flow_ops.cond( math_ops.equal(array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [1, 1])), -2), _init_tree, _nothing)
def get_stats(self, session): num_nodes = self.variables.end_of_tree.eval(session=session) - 1 num_leaves = array_ops.where( math_ops.equal(array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1])), constants.LEAF_NODE) ).eval(session=session).shape[0] return TreeStats(num_nodes, num_leaves)
def squeeze(x, axis): """Removes a 1-dimension from the tensor at index "axis". Arguments: x: A tensor or variable. axis: Axis to drop. Returns: A tensor with the same data as `x` but reduced dimensions. """ return array_ops.squeeze(x, [axis])
def ctc_batch_cost(y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. Arguments: y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. Returns: Tensor with shape (samples,1) containing the CTC loss of each element. """ label_length = math_ops.to_int32(array_ops.squeeze(label_length)) input_length = math_ops.to_int32(array_ops.squeeze(input_length)) sparse_labels = math_ops.to_int32( ctc_label_dense_to_sparse(y_true, label_length)) y_pred = math_ops.log(array_ops.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) return array_ops.expand_dims( ctc.ctc_loss( inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1)
def crf_log_norm(inputs, sequence_lengths, transition_params): """Computes the normalization for a CRF. Args: inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials to use as input to the CRF layer. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix. Returns: log_norm: A [batch_size] vector of normalizers for a CRF. """ # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def sparse_softmax_cross_entropy(logits, labels, weight=1.0, scope=None): """Cross-entropy loss using tf.nn.sparse_softmax_cross_entropy_with_logits. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. Args: logits: [batch_size, num_classes] logits outputs of the network . labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, num_classes)`. weight: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size] or [batch_size, 1]. scope: the scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shapes of logits, labels, and weight are incompatible, or if `weight` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", [logits, labels]): labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]]) weight = array_ops.squeeze(weight) losses = nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name="xentropy") return compute_weighted_loss(losses, weight)
def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None): """Returns prediction and loss for mean squared error regression.""" with ops.name_scope(name, "mean_squared_error_regressor", [tensor_in, labels]): predictions = nn.xw_plus_b(tensor_in, weights, biases) if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2: predictions = array_ops_.squeeze(predictions, squeeze_dims=[1]) return predictions, loss_ops.sum_of_squares(predictions, labels)
def _activations_to_predictions(self, unused_features, activations): with ops.name_scope('activations_to_predictions'): activations_shape = array_ops.shape(activations) flattened_activations = array_ops.reshape(activations, [-1, activations_shape[2]]) predictions = self._target_column.activations_to_predictions( flattened_activations, proba=False) reshaped_predictions = array_ops.reshape( predictions, [activations_shape[0], activations_shape[1], -1]) return array_ops.squeeze(reshaped_predictions, [2])
def _get_eval_ops(self, features, targets, metrics): features, _, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) _assert_float32(features) _assert_float32(labels) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, training=False, **self.construction_args) probabilities = graph_builder.inference_graph(features, data_spec=spec) # One-hot the labels. if not self.params.regression: labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64( array_ops.squeeze(labels)), self.params.num_classes, 1, 0)) if metrics is None: metrics = {self.accuracy_metric: eval_metrics.get_metric(self.accuracy_metric)} result = {} for name, metric in six.iteritems(metrics): result[name] = metric(probabilities, labels) return result
def sparse_boolean_mask(sparse_tensor, mask, name="sparse_boolean_mask"): """Boolean mask for `SparseTensor`s. Args: sparse_tensor: a `SparseTensor`. mask: a 1D boolean dense`Tensor` whose length is equal to the 0th dimension of `sparse_tensor`. name: optional name for this operation. Returns: A `SparseTensor` that contains row `k` of `sparse_tensor` iff `mask[k]` is `True`. """ # TODO(jamieas): consider mask dimension > 1 for symmetry with `boolean_mask`. with ops.name_scope(name, values=[sparse_tensor, mask]): mask = ops.convert_to_tensor(mask) mask_rows = array_ops.where(mask) first_indices = array_ops.squeeze(array_ops.slice(sparse_tensor.indices, [0, 0], [-1, 1])) # Identify indices corresponding to the rows identified by mask_rows. sparse_entry_matches = functional_ops.map_fn( lambda x: math_ops.equal(first_indices, x), mask_rows, dtype=dtypes.bool) # Combine the rows of index_matches to form a mask for the sparse indices # and values. to_retain = array_ops.reshape( functional_ops.foldl(math_ops.logical_or, sparse_entry_matches), [-1]) return sparse_ops.sparse_retain(sparse_tensor, to_retain)
def logits_to_predictions(self, logits, proba=False): if self.num_label_columns == 1: return array_ops.squeeze(logits, squeeze_dims=[1]) return logits
def _streaming_at_threshold(streaming_metrics_fn, threshold): def _streaming_metrics(predictions, targets, weights=None): precision_tensor, update_op = streaming_metrics_fn( predictions, labels=targets, thresholds=[threshold], weights=_float_weights_or_none(weights)) return array_ops.squeeze(precision_tensor), update_op return _streaming_metrics
def inference_graph(self, data): with ops.device(self.device_assigner.get_device(self.layer_num)): # Compute activations for the neural network. nn_activations = layers.fully_connected(data, 1) # There is always one activation per instance by definition, so squeeze # away the extra dimension. return array_ops.squeeze(nn_activations, squeeze_dims=[1])
def sparse_softmax_cross_entropy( logits, labels, weights=_WEIGHT_SENTINEL, scope=None, weight=_WEIGHT_SENTINEL): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. Args: logits: [batch_size, num_classes] logits outputs of the network . labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or `int64` in the range `[0, num_classes)`. weights: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size] or [batch_size, 1]. scope: the scope for the operations performed in computing the loss. weight: Deprecated alias for `weights`. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shapes of logits, labels, and weight are incompatible, or if `weight` is None. """ weights = _weights(weights, weight) with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", [logits, labels, weights]): labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]]) weights = array_ops.squeeze(weights) losses = nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name="xentropy") return compute_weighted_loss(losses, weights)
def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None): """Returns prediction and loss for mean squared error regression.""" with ops.name_scope(name, 'mean_squared_error_regressor', [tensor_in, labels]): predictions = nn.xw_plus_b(tensor_in, weights, biases) if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2: predictions = array_ops_.squeeze(predictions, squeeze_dims=[1]) return predictions, loss_ops.mean_squared_error(predictions, labels)
def _softmax_cross_entropy_loss(logits, labels): # Check that we got integer for classification. if not labels.dtype.is_integer: raise ValueError("Labels dtype should be integer " "Instead got %s." % labels.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] labels. if len(labels.get_shape()) == 2: labels = array_ops.squeeze(labels, squeeze_dims=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, labels) return loss_vec
def _softmax_cross_entropy_loss(logits, target): # Check that we got integer for classification. if not target.dtype.is_integer: raise ValueError("Target's dtype should be integer " "Instead got %s." % target.dtype) # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. if len(target.get_shape()) == 2: target = array_ops.squeeze(target, squeeze_dims=[1]) loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target) return loss_vec
def _streaming_at_threshold(streaming_metrics_fn, threshold): def _streaming_metrics(predictions, labels, weights=None): precision_tensor, update_op = streaming_metrics_fn( predictions, labels=labels, thresholds=[threshold], weights=_float_weights_or_none(weights)) return array_ops.squeeze(precision_tensor), update_op return _streaming_metrics
def _squeeze_and_onehot(targets, depth): targets = array_ops.squeeze(targets, squeeze_dims=[1]) return array_ops.one_hot(math_ops.to_int32(targets), depth)
def crf_log_norm(inputs, sequence_lengths, transition_params): """Computes the normalization for a CRF. Args: inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials to use as input to the CRF layer. sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix. Returns: log_norm: A [batch_size] vector of normalizers for a CRF. """ # Split up the first and rest of the inputs in preparation for the forward # algorithm. first_input = array_ops.slice(inputs, [0, 0, 0], [-1, 1, -1]) first_input = array_ops.squeeze(first_input, [1]) rest_of_input = array_ops.slice(inputs, [0, 1, 0], [-1, -1, -1]) # Compute the alpha values in the forward algorithm in order to get the # partition function. forward_cell = CrfForwardRnnCell(transition_params) ''' tf.nn.rnn creates an unrolled graph for a fixed RNN length. That means, if you call tf.nn.rnn with inputs having 200 time steps you are creating a static graph with 200 RNN steps. First, graph creation is slow. Second, you’re unable to pass in longer sequences (> 200) than you’ve originally specified.tf.nn.dynamic_rnn solves this. It uses a tf.While loop to dynamically construct the graph when it is executed. That means graph creation is faster and you can feed batches of variable size. ''' _, alphas = rnn.dynamic_rnn( cell=forward_cell, inputs=rest_of_input, sequence_length=sequence_lengths - 1, initial_state=first_input, dtype=dtypes.float32) ''' ''' log_norm = math_ops.reduce_logsumexp(alphas, [1]) return log_norm
def _resize_image(image, height, width): image = array_ops.expand_dims(image, 0) image = image_ops.resize_bilinear(image, [height, width]) return array_ops.squeeze(image, [0])
def testCrfUnaryScore(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) sequence_lengths = np.array(3, dtype=np.int32) with self.test_session() as sess: unary_score = crf.crf_unary_score( tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), inputs=array_ops.expand_dims(inputs, 0)) unary_score = array_ops.squeeze(unary_score, [0]) tf_unary_score = sess.run(unary_score) expected_unary_score = sum(inputs[i][tag_indices[i]] for i in range(sequence_lengths)) self.assertAllClose(tf_unary_score, expected_unary_score)
def testViterbiDecode(self): inputs = np.array( [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32) transition_params = np.array( [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) sequence_lengths = np.array(3, dtype=np.int32) num_words = inputs.shape[0] num_tags = inputs.shape[1] with self.test_session() as sess: all_sequence_scores = [] all_sequences = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequences.append(tag_indices) sequence_score = crf.crf_sequence_score( inputs=array_ops.expand_dims(inputs, 0), tag_indices=array_ops.expand_dims(tag_indices, 0), sequence_lengths=array_ops.expand_dims(sequence_lengths, 0), transition_params=constant_op.constant(transition_params)) sequence_score = array_ops.squeeze(sequence_score, [0]) all_sequence_scores.append(sequence_score) tf_all_sequence_scores = sess.run(all_sequence_scores) expected_max_sequence_index = np.argmax(tf_all_sequence_scores) expected_max_sequence = all_sequences[expected_max_sequence_index] expected_max_score = tf_all_sequence_scores[expected_max_sequence_index] actual_max_sequence, actual_max_score = crf.viterbi_decode( inputs[:sequence_lengths], transition_params) self.assertAllClose(actual_max_score, expected_max_score) self.assertEqual(actual_max_sequence, expected_max_sequence[:sequence_lengths])
def compute_weighted_loss(losses, weights=1.0, scope=None): """Computes the weighted loss. Args: losses: A tensor of size [batch_size, d1, ... dN]. weights: A tensor of size [1] or [batch_size, d1, ... dK] where K < N. scope: the scope for the operations performed in computing the loss. Returns: A scalar `Tensor` that returns the weighted loss. Raises: ValueError: If `weights` is `None` or the shape is not compatible with `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ with ops.name_scope(scope, "weighted_loss", [losses, weights]): losses = ops.convert_to_tensor(losses) input_dtype = losses.dtype losses = math_ops.to_float(losses) weights = math_ops.to_float(ops.convert_to_tensor(weights)) if losses.get_shape().ndims is None: raise ValueError("losses.get_shape().ndims cannot be None") weights_shape = weights.get_shape() if weights_shape.ndims is None: raise ValueError("weights.get_shape().ndims cannot be None") if weights_shape.ndims > 1 and weights_shape.dims[-1].is_compatible_with(1): weights = array_ops.squeeze(weights, [-1]) total_loss = _scale_losses(losses, weights) num_present = _num_present(losses, weights) mean_loss = _safe_mean(total_loss, num_present) # convert the result back to the input type mean_loss = math_ops.cast(mean_loss, input_dtype) add_loss(mean_loss) return mean_loss
def sparse_softmax_cross_entropy(logits, labels, weights=1.0, scope=None): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. Args: logits: [batch_size, num_classes] logits outputs of the network . labels: [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, num_classes)`. weights: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size] or [batch_size, 1]. scope: the scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the mean loss value. Raises: ValueError: If the shapes of `logits`, `labels`, and `weights` are incompatible, or if `weights` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", [logits, labels, weights]) as scope: labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]]) weights = array_ops.squeeze(weights) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") return compute_weighted_loss(losses, weights, scope=scope)
def _prepare_inputs_for_rnn(sequence_features, context_features, num_unroll): """Prepares features batched by the SQSS for input to a state-saving RNN. Args: sequence_features: A dict of sequence feature name to `Tensor`, with tensors of shape `[batch_size, num_unroll, ...]` and type float32. context_features: A dict of context feature name to `Tensor`, with tensors of shape `[batch_size, 1, ...]` and type float32. num_unroll: Python integer, how many time steps to unroll at a time. The input sequences of length `k` are then split into `k / num_unroll` many segments. Returns: features_by_time: A list of length `num_unroll` with `Tensor` entries of shape `[batch_size, len(sequence_features) + len(context_features)]` of type float32. Features are stored in lexicographic order by their corresponding feature dict keys, first in the `sequence_features` and then in the `context_features` dicts. Context features are copied into each time step. """ def _tile(feature): return array_ops.squeeze( array_ops.tile(array_ops.expand_dims(feature, 1), [1, num_unroll, 1]), axis=2) sequence_features = [sequence_features[k] for k in sorted(sequence_features)] if not context_features: return array_ops.unstack(array_ops.stack(sequence_features, 2), axis=1) context_features = [ _tile(context_features[k]) for k in sorted(context_features) ] return array_ops.unstack( array_ops.stack(sequence_features + context_features, 2), axis=1)
def _softmax_cross_entropy_loss(logits, labels): with ops.name_scope( None, "softmax_cross_entropy_loss", (logits, labels,)) as name: labels = ops.convert_to_tensor(labels) # Check that we got integer for classification. if not labels.dtype.is_integer: raise ValueError("Labels dtype should be integer " "Instead got %s." % labels.dtype) # TODO(ptucker): This will break for dynamic shapes. # sparse_softmax_cross_entropy_with_logits requires [batch_size] labels. if len(labels.get_shape()) == 2: labels = array_ops.squeeze(labels, squeeze_dims=(1,)) return nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name=name)