我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.greater()。
def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
def _reset(self): """Resets wait counter and cooldown counter. """ if self.mode not in ['auto', 'min', 'max']: logging.warning('Learning Rate Plateau Reducing mode %s is unknown, ' 'fallback to auto mode.' % (self.mode)) self.mode = 'auto' if (self.mode == 'min' or (self.mode == 'auto' and 'acc' not in self.monitor)): self.monitor_op = lambda a, b: np.less(a, b - self.epsilon) self.best = np.Inf else: self.monitor_op = lambda a, b: np.greater(a, b + self.epsilon) self.best = -np.Inf self.cooldown_counter = 0 self.wait = 0 self.lr_epsilon = self.min_lr * 1e-4
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: A `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ return math_ops.select( math_ops.greater(denominator, 0), math_ops.div(numerator, math_ops.select( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def _safe_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ return math_ops.select( math_ops.greater(denominator, 0), math_ops.truediv(numerator, denominator), 0, name=name)
def _variance(self): var = (self._ones() * math_ops.square(self.sigma) * self.df / (self.df - 2)) # When 1 < df <= 2, variance is infinite. inf = np.array(np.inf, dtype=self.dtype.as_numpy_dtype()) result_where_defined = math_ops.select( math_ops.greater(self.df, array_ops.fill(self.batch_shape(), 2.)), var, array_ops.fill(self.batch_shape(), inf, name="inf")) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.greater(self.df, self._ones()), result_where_defined, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="variance not defined for components of df <= 1"), ], result_where_defined)
def _mode(self): mode = (self.a - 1.)/ (self.a_b_sum - 2.) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.logical_and( math_ops.greater(self.a, 1.), math_ops.greater(self.b, 1.)), mode, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.a, message="Mode not defined for components of a <= 1."), check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.b, message="Mode not defined for components of b <= 1."), ], mode)
def _mode(self): mode = ((self.alpha - 1.) / (array_ops.expand_dims(self.alpha_sum, dim=-1) - math_ops.cast(self.event_shape()[0], self.dtype))) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) shape = array_ops.concat(0, (self.batch_shape(), self.event_shape())) return math_ops.select( math_ops.greater(self.alpha, 1.), mode, array_ops.fill(shape, nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.alpha, message="mode not defined for components of alpha <= 1") ], mode)
def _safe_div(numerator, denominator, name="value"): """Computes a safe divide which returns 0 if the denominator is zero. Note that the function contains an additional conditional check that is necessary for avoiding situations where the loss is zero causing NaNs to creep into the gradient computation. Args: numerator: An arbitrary `Tensor`. denominator: A `Tensor` whose shape matches `numerator` and whose values are assumed to be non-negative. name: An optional name for the returned op. Returns: The element-wise value of the numerator divided by the denominator. """ return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( math_ops.equal(denominator, 0), array_ops.ones_like(denominator), denominator)), array_ops.zeros_like(numerator), name=name)
def _safe_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ return array_ops.where( math_ops.greater(denominator, 0), math_ops.truediv(numerator, denominator), 0, name=name)
def _mean(self): mean = self.loc * array_ops.ones(self.batch_shape(), dtype=self.dtype) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return array_ops.where( math_ops.greater( self.df, array_ops.ones(self.batch_shape(), dtype=self.dtype)), mean, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies( [ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="mean not defined for components of df <= 1"), ], mean)
def _mode(self): mode = (self.a - 1.)/ (self.a_b_sum - 2.) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return array_ops.where( math_ops.logical_and( math_ops.greater(self.a, 1.), math_ops.greater(self.b, 1.)), mode, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.a, message="Mode not defined for components of a <= 1."), check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.b, message="Mode not defined for components of b <= 1."), ], mode)
def _mode(self): mode = ((self.alpha - 1.) / (array_ops.expand_dims(self.alpha_sum, dim=-1) - math_ops.cast(self.event_shape()[0], self.dtype))) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) shape = array_ops.concat((self.batch_shape(), self.event_shape()), 0) return array_ops.where( math_ops.greater(self.alpha, 1.), mode, array_ops.fill(shape, nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.alpha, message="mode not defined for components of alpha <= 1") ], mode)
def safe_divide(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ return tf.where( math_ops.greater(denominator, 0), math_ops.divide(numerator, denominator), tf.zeros_like(numerator), name=name)
def _safe_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. Args: numerator: A real `Tensor`. denominator: A real `Tensor`, with dtype matching `numerator`. name: Name for the returned op. Returns: 0 if `denominator` <= 0, else `numerator` / `denominator` """ return tf.where( math_ops.greater(denominator, 0), math_ops.divide(numerator, denominator), tf.zeros_like(numerator), name=name)
def greater(x, y): """Element-wise truth value of (x > y). Arguments: x: Tensor or variable. y: Tensor or variable. Returns: A bool tensor. """ return math_ops.greater(x, y)
def __init__(self, filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1): super(ModelCheckpoint, self).__init__() self.monitor = monitor self.verbose = verbose self.filepath = filepath self.save_best_only = save_best_only self.save_weights_only = save_weights_only self.period = period self.epochs_since_last_save = 0 if mode not in ['auto', 'min', 'max']: logging.warning('ModelCheckpoint mode %s is unknown, ' 'fallback to auto mode.' % (mode)) mode = 'auto' if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf
def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto'): super(EarlyStopping, self).__init__() self.monitor = monitor self.patience = patience self.verbose = verbose self.min_delta = min_delta self.wait = 0 self.stopped_epoch = 0 if mode not in ['auto', 'min', 'max']: logging.warning('EarlyStopping mode %s is unknown, ' 'fallback to auto mode.' % (self.mode)) mode = 'auto' if mode == 'min': self.monitor_op = np.less elif mode == 'max': self.monitor_op = np.greater else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater else: self.monitor_op = np.less if self.monitor_op == np.greater: self.min_delta *= 1 else: self.min_delta *= -1
def _ndtr(x): """Implements ndtr core logic.""" half_sqrt_2 = constant_op.constant( 0.5 * math.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = math_ops.abs(w) y = math_ops.select(math_ops.less(z, half_sqrt_2), 1. + math_ops.erf(w), math_ops.select(math_ops.greater(w, 0.), 2. - math_ops.erfc(z), math_ops.erfc(z))) return 0.5 * y
def sigmoid_cross_entropy(logits, multi_class_labels, weight=1.0, label_smoothing=0, scope=None): """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. If `label_smoothing` is nonzero, smooth the labels towards 1/2: new_multiclass_labels = multiclass_labels * (1 - label_smoothing) + 0.5 * label_smoothing Args: logits: [batch_size, num_classes] logits outputs of the network . multi_class_labels: [batch_size, num_classes] target labels in (0, 1). weight: Coefficients for the loss. The tensor must be a scalar, a tensor of shape [batch_size] or shape [batch_size, num_classes]. label_smoothing: If greater than 0 then smooth the labels. scope: The scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `predictions` doesn't match that of `targets` or if the shape of `weight` is invalid or if `weight` is None. """ with ops.name_scope(scope, "sigmoid_cross_entropy_loss", [logits, multi_class_labels]): logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape()) multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype) if label_smoothing > 0: multi_class_labels = (multi_class_labels * (1 - label_smoothing) + 0.5 * label_smoothing) losses = nn.sigmoid_cross_entropy_with_logits(logits, multi_class_labels, name="xentropy") return compute_weighted_loss(losses, weight)
def _prune_invalid_ids(sparse_ids, sparse_weights): """Prune invalid IDs (< 0) from the input ids and weights.""" is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) if sparse_weights is not None: is_id_valid = math_ops.logical_and( is_id_valid, math_ops.greater(sparse_weights.values, 0)) sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) if sparse_weights is not None: sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) return sparse_ids, sparse_weights
def _mean(self): mean = self.mu * self._ones() if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return math_ops.select( math_ops.greater(self.df, self._ones()), mean, array_ops.fill(self.batch_shape(), nan, name="nan")) else: return control_flow_ops.with_dependencies([ check_ops.assert_less( array_ops.ones((), dtype=self.dtype), self.df, message="mean not defined for components of df <= 1"), ], mean)
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" predictions = {prediction_key.PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid( logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[prediction_key.PredictionKey.PROBABILITIES] = math_ops.sigmoid( logits) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def __gt__(self, other): return greater(self, other)
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" with ops.name_scope(None, "predictions", (logits,)): return { prediction_key.PredictionKey.LOGITS: logits, prediction_key.PredictionKey.PROBABILITIES: math_ops.sigmoid( logits, name=prediction_key.PredictionKey.PROBABILITIES), prediction_key.PredictionKey.CLASSES: math_ops.to_int64( math_ops.greater(logits, 0), name=prediction_key.PredictionKey.CLASSES) }
def setUp(self): super(CoreBinaryOpsTest, self).setUp() self.x_probs_broadcast_tensor = array_ops.reshape( self.x_probs_lt.tensor, [self.x_size, 1, self.probs_size]) self.channel_probs_broadcast_tensor = array_ops.reshape( self.channel_probs_lt.tensor, [1, self.channel_size, self.probs_size]) # == and != are not element-wise for tf.Tensor, so they shouldn't be # elementwise for LabeledTensor, either. self.ops = [ ('add', operator.add, math_ops.add, core.add), ('sub', operator.sub, math_ops.subtract, core.sub), ('mul', operator.mul, math_ops.multiply, core.mul), ('div', operator.truediv, math_ops.div, core.div), ('mod', operator.mod, math_ops.mod, core.mod), ('pow', operator.pow, math_ops.pow, core.pow_function), ('equal', None, math_ops.equal, core.equal), ('less', operator.lt, math_ops.less, core.less), ('less_equal', operator.le, math_ops.less_equal, core.less_equal), ('not_equal', None, math_ops.not_equal, core.not_equal), ('greater', operator.gt, math_ops.greater, core.greater), ('greater_equal', operator.ge, math_ops.greater_equal, core.greater_equal), ] self.test_lt_1 = self.x_probs_lt self.test_lt_2 = self.channel_probs_lt self.test_lt_1_broadcast = self.x_probs_broadcast_tensor self.test_lt_2_broadcast = self.channel_probs_broadcast_tensor self.broadcast_axes = [self.a0, self.a1, self.a3]
def test_reflexive(self): labeled_tensor = self.x_probs_lt + 1 # all elements must be >0 for division for op_name, infix_op, _, lt_op in self.ops: if infix_op is not None: expected_lt = lt_op(2, labeled_tensor) actual_lt = infix_op(2, labeled_tensor) # Python uses greater for the reflexive version of less (and vise-versa) if 'less' in op_name: op_name = op_name.replace('less', 'greater') elif 'greater' in op_name: op_name = op_name.replace('greater', 'less') self.assertIn(op_name, actual_lt.name) self.assertLabeledTensorsEqual(expected_lt, actual_lt)
def _ndtr(x): """Implements ndtr core logic.""" half_sqrt_2 = constant_op.constant( 0.5 * math.sqrt(2.), dtype=x.dtype, name="half_sqrt_2") w = x * half_sqrt_2 z = math_ops.abs(w) y = array_ops.where(math_ops.less(z, half_sqrt_2), 1. + math_ops.erf(w), array_ops.where(math_ops.greater(w, 0.), 2. - math_ops.erfc(z), math_ops.erfc(z))) return 0.5 * y
def gumbel_decoder_fn(encoder_state, embedding_matrix, output_projections, maximum_length, start_of_sequence_id=2, end_of_sequence_id=3, temperature=1.0, name=None): with tf.name_scope(name, "gumbel_decoder_fn", [ encoder_state, embedding_matrix, output_projections, maximum_length, start_of_sequence_id, end_of_sequence_id, temperature]) as scope: batch_size = tf.shape(encoder_state)[0] W, b = output_projections start_of_sequence_id = ops.convert_to_tensor(start_of_sequence_id, tf.int32) end_of_sequence_id = ops.convert_to_tensor(end_of_sequence_id, tf.int32) temperature = ops.convert_to_tensor(temperature, tf.float32) maximum_length = ops.convert_to_tensor(maximum_length, tf.int32) def _decoder_fn(time, cell_state, cell_input, cell_output, context_state): with ops.name_scope(name, "gumbel_decoder_fn", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: # -- not None if training raise ValueError("Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # -- initial values next_done = array_ops.zeros([batch_size, ], dtype=dtypes.bool) next_cell_state = encoder_state next_cell_input = tf.reshape(tf.tile(embedding_matrix[start_of_sequence_id], [batch_size]), shape=tf.shape(encoder_state)) emit_output = cell_output next_context_state = context_state else: # -- transition function with ops.name_scope(name, "gumbel_output_fn", [W, b, cell_output, end_of_sequence_id, temperature]): # -- output projection parameters usually used for output logits prior to softmax output_logits = tf.add(tf.matmul(cell_output, W), b) # [B, H] * [H, V] + [V] -> [B, V] # -- stopping criterion if argmax is output_argmax = tf.cast(tf.argmax(output_logits, axis=1), tf.int32) next_done = tf.equal(output_argmax, end_of_sequence_id) # -- sample from gumbel softmax (aka concrete) distribution, higher the temperature the spikier output_probs = gumbel_softmax(output_logits, temperature=temperature, hard=False) # soft embeddings for the next input next_cell_input = tf.matmul(output_probs, embedding_matrix) # [B, V] * [V, H] -> [B, H] next_cell_state = cell_state emit_output = cell_output next_context_state = context_state next_done = control_flow_ops.cond(math_ops.greater(time, maximum_length), lambda: array_ops.ones([batch_size, ], dtype=dtypes.bool), lambda: next_done) return next_done, next_cell_state, next_cell_input, emit_output, next_context_state return _decoder_fn
def softmax_cross_entropy(logits, onehot_labels, weight=1.0, label_smoothing=0, scope=None): """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes Args: logits: [batch_size, num_classes] logits outputs of the network . onehot_labels: [batch_size, num_classes] target one_hot_encoded labels. weight: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size]. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `logits` doesn't match that of `onehot_labels` or if the shape of `weight` is invalid or if `weight` is None. """ with ops.name_scope(scope, "softmax_cross_entropy_loss", [logits, onehot_labels]): logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape()) onehot_labels = math_ops.cast(onehot_labels, logits.dtype) if label_smoothing > 0: num_classes = math_ops.cast( array_ops.shape(onehot_labels)[1], logits.dtype) smooth_positives = 1.0 - label_smoothing smooth_negatives = label_smoothing / num_classes onehot_labels = onehot_labels * smooth_positives + smooth_negatives losses = nn.softmax_cross_entropy_with_logits(logits, onehot_labels, name="xentropy") return compute_weighted_loss(losses, weight)
def sigmoid_cross_entropy( logits, multi_class_labels, weights=_WEIGHT_SENTINEL, label_smoothing=0, scope=None, weight=_WEIGHT_SENTINEL): """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. If `label_smoothing` is nonzero, smooth the labels towards 1/2: new_multiclass_labels = multiclass_labels * (1 - label_smoothing) + 0.5 * label_smoothing Args: logits: [batch_size, num_classes] logits outputs of the network . multi_class_labels: [batch_size, num_classes] target labels in (0, 1). weights: Coefficients for the loss. The tensor must be a scalar, a tensor of shape [batch_size] or shape [batch_size, num_classes]. label_smoothing: If greater than 0 then smooth the labels. scope: The scope for the operations performed in computing the loss. weight: Deprecated alias for `weights`. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `logits` doesn't match that of `multi_class_labels` or if the shape of `weight` is invalid, or if `weight` is None. """ weights = _weights(weights, weight) with ops.name_scope(scope, "sigmoid_cross_entropy_loss", [logits, multi_class_labels, weights]): logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape()) multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype) if label_smoothing > 0: multi_class_labels = (multi_class_labels * (1 - label_smoothing) + 0.5 * label_smoothing) losses = nn.sigmoid_cross_entropy_with_logits(logits, multi_class_labels, name="xentropy") return compute_weighted_loss(losses, weights)
def softmax_cross_entropy( logits, onehot_labels, weights=_WEIGHT_SENTINEL, label_smoothing=0, scope=None, weight=_WEIGHT_SENTINEL): """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits. `weight` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weight` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes Args: logits: [batch_size, num_classes] logits outputs of the network . onehot_labels: [batch_size, num_classes] target one_hot_encoded labels. weights: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size]. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. weight: Deprecated alias for `weights`. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `logits` doesn't match that of `onehot_labels` or if the shape of `weight` is invalid or if `weight` is None. """ weights = _weights(weights, weight) with ops.name_scope(scope, "softmax_cross_entropy_loss", [logits, onehot_labels, weights]): logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape()) onehot_labels = math_ops.cast(onehot_labels, logits.dtype) if label_smoothing > 0: num_classes = math_ops.cast( array_ops.shape(onehot_labels)[1], logits.dtype) smooth_positives = 1.0 - label_smoothing smooth_negatives = label_smoothing / num_classes onehot_labels = onehot_labels * smooth_positives + smooth_negatives losses = nn.softmax_cross_entropy_with_logits(logits, onehot_labels, name="xentropy") return compute_weighted_loss(losses, weights)
def sigmoid_cross_entropy( logits, multi_class_labels, weights=1.0, label_smoothing=0, scope=None): """Creates a cross-entropy loss using tf.nn.sigmoid_cross_entropy_with_logits. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. If `label_smoothing` is nonzero, smooth the labels towards 1/2: new_multiclass_labels = multiclass_labels * (1 - label_smoothing) + 0.5 * label_smoothing Args: logits: [batch_size, num_classes] logits outputs of the network . multi_class_labels: [batch_size, num_classes] labels in (0, 1). weights: Coefficients for the loss. The tensor must be a scalar, a tensor of shape [batch_size] or shape [batch_size, num_classes]. label_smoothing: If greater than 0 then smooth the labels. scope: The scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the loss value. Raises: ValueError: If the shape of `logits` doesn't match that of `multi_class_labels` or if the shape of `weights` is invalid, or if `weights` is None. """ with ops.name_scope(scope, "sigmoid_cross_entropy_loss", [logits, multi_class_labels, weights]) as scope: logits.get_shape().assert_is_compatible_with(multi_class_labels.get_shape()) multi_class_labels = math_ops.cast(multi_class_labels, logits.dtype) if label_smoothing > 0: multi_class_labels = (multi_class_labels * (1 - label_smoothing) + 0.5 * label_smoothing) losses = nn.sigmoid_cross_entropy_with_logits(labels=multi_class_labels, logits=logits, name="xentropy") return compute_weighted_loss(losses, weights, scope=scope)
def softmax_cross_entropy( logits, onehot_labels, weights=1.0, label_smoothing=0, scope=None): """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits. `weights` acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If `weights` is a tensor of size [`batch_size`], then the loss weights apply to each corresponding sample. If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes Args: logits: [batch_size, num_classes] logits outputs of the network . onehot_labels: [batch_size, num_classes] one-hot-encoded labels. weights: Coefficients for the loss. The tensor must be a scalar or a tensor of shape [batch_size]. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. Returns: A scalar `Tensor` representing the mean loss value. Raises: ValueError: If the shape of `logits` doesn't match that of `onehot_labels` or if the shape of `weights` is invalid or if `weights` is None. """ with ops.name_scope(scope, "softmax_cross_entropy_loss", [logits, onehot_labels, weights]) as scope: logits.get_shape().assert_is_compatible_with(onehot_labels.get_shape()) onehot_labels = math_ops.cast(onehot_labels, logits.dtype) if label_smoothing > 0: num_classes = math_ops.cast( array_ops.shape(onehot_labels)[1], logits.dtype) smooth_positives = 1.0 - label_smoothing smooth_negatives = label_smoothing / num_classes onehot_labels = onehot_labels * smooth_positives + smooth_negatives losses = nn.softmax_cross_entropy_with_logits(labels=onehot_labels, logits=logits, name="xentropy") return compute_weighted_loss(losses, weights, scope=scope)
def __init__(self, n_classes, label_name=None, weight_column_name=None, enable_centered_bias=False, head_name=None, loss_fn=_softmax_cross_entropy_loss, thresholds=None, metric_class_ids=None): """_Head for classification. Args: n_classes: Number of classes, must be greater than 2 (for 2 classes, use `_BinaryLogisticHead`). label_name: String, name of the key in label dict. Can be null if label is a tensor (single headed models). weight_column_name: A string defining feature column name representing weights. It is used to down weight or boost examples during training. It will be multiplied by the loss of the example. enable_centered_bias: A bool. If True, estimator will learn a centered bias variable for each class. Rest of the model structure learns the residual after centered bias. head_name: name of the head. If provided, predictions, summary, metrics keys will be suffixed by `"/" + head_name` and the default variable scope will be `head_name`. loss_fn: Loss function. thresholds: thresholds for eval. metric_class_ids: List of class IDs for which we should report per-class metrics. Must all be in the range `[0, n_classes)`. Raises: ValueError: if `n_classes` or `metric_class_ids` is invalid. """ super(_MultiClassHead, self).__init__( problem_type=constants.ProblemType.CLASSIFICATION, logits_dimension=n_classes, label_name=label_name, weight_column_name=weight_column_name, head_name=head_name) if (n_classes is None) or (n_classes <= 2): raise ValueError("n_classes must be > 2: %s." % n_classes) self._thresholds = thresholds if thresholds else (.5,) self._loss_fn = loss_fn self._enable_centered_bias = enable_centered_bias self._metric_class_ids = tuple([] if metric_class_ids is None else metric_class_ids) for class_id in self._metric_class_ids: if (class_id < 0) or (class_id >= n_classes): raise ValueError("Class ID %s not in [0, %s)." % (class_id, n_classes))
def get_logits_and_probs(logits=None, probs=None, multidimensional=False, validate_args=False, name="get_logits_and_probs"): """Converts logit to probabilities (or vice-versa), and returns both. Args: logits: Numeric `Tensor` representing log-odds. probs: Numeric `Tensor` representing probabilities. multidimensional: `Boolean`, default `False`. If `True`, represents whether the last dimension of `logits` or `probs`, a `[N1, N2, ... k]` dimensional tensor, representing the logit or probability of `shape[-1]` classes. validate_args: `Boolean`, default `False`. When `True`, either assert `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension of `probs` sums to one. name: A name for this operation (optional). Returns: logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or `1`, then the corresponding entry in the returned logit will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `probs` nor `logits` were passed in, or both were. """ with ops.name_scope(name, values=[probs, logits]): if (probs is None) == (logits is None): raise ValueError("Must pass probs or logits, but not both.") if probs is None: logits = ops.convert_to_tensor(logits, name="logits") if multidimensional: return logits, nn.softmax(logits, name="probs") return logits, math_ops.sigmoid(logits, name="probs") probs = ops.convert_to_tensor(probs, name="probs") if validate_args: with ops.name_scope("validate_probs"): one = constant_op.constant(1., probs.dtype) dependencies = [check_ops.assert_non_negative(probs)] if multidimensional: dependencies += [assert_close(math_ops.reduce_sum(probs, -1), one, message="probs does not sum to 1.")] else: dependencies += [check_ops.assert_less_equal( probs, one, message="probs has components greater than 1.")] probs = control_flow_ops.with_dependencies(dependencies, probs) with ops.name_scope("logits"): if multidimensional: # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see # logits = log(probs) - log(gather(probs, pivot)). A side-effect of # being consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional case # explicitly keeps the pivot dimension. return math_ops.log(probs), probs return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
def softplus_inverse(x, name=None): """Computes the inverse softplus, i.e., x = softplus_inverse(softplus(x)). Mathematically this op is equivalent to: ```none softplus_inverse = log(exp(x) - 1.)
Args: x: Tensor. Non-negative (not enforced), floating-point. name: A name for the operation (optional).
Tensor
Returns: Tensor. Has the same type/shape as input x. """ with ops.name_scope(name, "softplus_inverse", values=[x]): x = ops.convert_to_tensor(x, name="x")
x
# We begin by deriving a more numerically stable softplus_inverse: # x = softplus(y) = Log[1 + exp{y}], (which means x > 0). # ==> exp{x} = 1 + exp{y} (1) # ==> y = Log[exp{x} - 1] (2) # = Log[(exp{x} - 1) / exp{x}] + Log[exp{x}] # = Log[(1 - exp{-x}) / 1] + Log[exp{x}] # = Log[1 - exp{-x}] + x (3) # (2) is the "obvious" inverse, but (3) is more stable than (2) for large x. # For small x (e.g. x = 1e-10), (3) will become -inf since 1 - exp{-x} will # be zero. To fix this, we use 1 - exp{-x} approx x for small x > 0. # # In addition to the numerically stable derivation above, we clamp # small/large values to be congruent with the logic in: # tensorflow/core/kernels/softplus_op.h # # Finally, we set the input to one whenever the input is too large or too # small. This ensures that no unchosen codepath is +/- inf. This is # necessary to ensure the gradient doesn't get NaNs. Recall that the # gradient of `where` behaves like `pred*pred_true + (1-pred)*pred_false` # thus an `inf` in an unselected path results in `0*inf=nan`. We are careful # to overwrite `x` with ones only when we will never actually use this # value. Note that we use ones and not zeros since `log(expm1(0.)) = -inf`. threshold = np.log(np.finfo(x.dtype.as_numpy_dtype).eps) + 2. is_too_small = math_ops.less(x, np.exp(threshold)) is_too_large = math_ops.greater(x, -threshold) too_small_value = math_ops.log(x) too_large_value = x # This `where` will ultimately be a NOP because we won't select this # codepath whenever we used the surrogate `ones_like`. x = array_ops.where(math_ops.logical_or(is_too_small, is_too_large), array_ops.ones_like(x), x) y = x + math_ops.log(-math_ops.expm1(-x)) # == log(expm1(x)) return array_ops.where(is_too_small, too_small_value, array_ops.where(is_too_large, too_large_value, y))
```