def average_impurity(self): """Constructs a TF graph for evaluating the average leaf impurity of a tree. If in regression mode, this is the leaf variance. If in classification mode, this is the gini impurity. Returns: The last op in the graph. """ children = array_ops.squeeze(array_ops.slice( self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1]) is_leaf = math_ops.equal(constants.LEAF_NODE, children) leaves = math_ops.to_int32(array_ops.squeeze(array_ops.where(is_leaf), squeeze_dims=[1])) counts = array_ops.gather(self.variables.node_sums, leaves) gini = self._weighted_gini(counts) # Guard against step 1, when there often are no leaves yet. def impurity(): return gini # Since average impurity can be used for loss, when there's no data just # return a big number so that loss always decreases. def big(): return array_ops.ones_like(gini, dtype=dtypes.float32) * 10000000. return control_flow_ops.cond(math_ops.greater( array_ops.shape(leaves)[0], 0), impurity, big)
def loss(self, data, labels): """The loss to minimize while training.""" if self.is_regression: diff = self.training_inference_graph(data) - math_ops.to_float(labels) mean_squared_error = math_ops.reduce_mean(diff * diff) root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss") loss = root_mean_squared_error else: loss = math_ops.reduce_mean( nn_ops.sparse_softmax_cross_entropy_with_logits( self.training_inference_graph(data), array_ops.squeeze(math_ops.to_int32(labels))), name="loss") if self.regularizer: loss += layers.apply_regularization(self.regularizer, variables.trainable_variables()) return loss
def ctc_batch_cost(y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. Arguments: y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. Returns: Tensor with shape (samples,1) containing the CTC loss of each element. """ label_length = math_ops.to_int32(array_ops.squeeze(label_length)) input_length = math_ops.to_int32(array_ops.squeeze(input_length)) sparse_labels = math_ops.to_int32( ctc_label_dense_to_sparse(y_true, label_length)) y_pred = math_ops.log(array_ops.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) return array_ops.expand_dims( ctc.ctc_loss( inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1)
def one_hot_wrapper(num_classes, loss_fn): """Some loss functions take one-hot labels.""" def _loss(probs, targets): one_hot_labels = array_ops.one_hot( math_ops.to_int32(targets), num_classes, on_value=1., off_value=0., dtype=dtypes.float32) return loss_fn(probs, one_hot_labels) return _loss
def _top_k_generator(k): def _top_k(probabilities, targets): return metric_ops.streaming_mean(nn.in_top_k(probabilities, math_ops.to_int32(targets), k)) return _top_k
def _softmax_entropy(probabilities, targets, weights=None): return metric_ops.streaming_mean(losses.sparse_softmax_cross_entropy( probabilities, math_ops.to_int32(targets)), weights=weights)
def _FloatyGatherGrad(op, grad): if op.inputs[0].get_shape().is_fully_defined(): dense_shape = constant_op.constant(op.inputs[0].get_shape().as_list()) values_shape = [-1] + op.inputs[0].get_shape()[1:].as_list() else: # op.inputs[0] can be large, so colocate the shape calculation with it. with ops.colocate_with(op.inputs[0]): dense_shape = array_ops.shape(op.inputs[0]) values_shape = array_ops.concat(0, [[-1], dense_shape[1:]]) values = array_ops.reshape(grad, values_shape) indices = math_ops.to_int32(array_ops.reshape(op.inputs[1], [-1])) return [ops.IndexedSlices(values, indices, dense_shape), None]
def one_hot_mask(labels, num_classes, scope=None): """Compute 1-hot encodings for masks. Given a label image, this computes the one hot encoding at each pixel. Args: labels: (batch_size, width, height, 1) tensor containing labels. num_classes: number of classes scope: optional scope name Returns: Tensor of shape (batch_size, width, height, num_classes) with a 1-hot encoding. """ with ops.name_scope(scope, "OneHotMask", [labels]): height, width, depth = _shape(labels) assert depth == 1 sparse_labels = math_ops.to_int32(array_ops.reshape(labels, [-1, 1])) sparse_size, _ = _shape(sparse_labels) indices = array_ops.reshape(math_ops.range(0, sparse_size, 1), [-1, 1]) concated = array_ops.concat([indices, sparse_labels], 1) dense_result = sparse_ops.sparse_to_dense(concated, [sparse_size, num_classes], 1.0, 0.0) result = array_ops.reshape(dense_result, [height, width, num_classes]) return result
def seq_labeling_decoder_linear(decoder_inputs, num_decoder_symbols, scope=None, sequence_length=None, dtype=tf.float32): with tf.variable_scope(scope or "non-attention_RNN"): decoder_outputs = list() # copy over logits once out of sequence_length if decoder_inputs[0].get_shape().ndims != 1: (fixed_batch_size, output_size) = decoder_inputs[0].get_shape().with_rank(2) else: fixed_batch_size = decoder_inputs[0].get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = tf.shape(decoder_inputs[0])[0] if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length is not None: # Prepare variables zero_logit = tf.zeros( tf.stack([batch_size, num_decoder_symbols]), decoder_inputs[0].dtype) zero_logit.set_shape( tensor_shape.TensorShape([fixed_batch_size.value, num_decoder_symbols])) min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) for time, input_ in enumerate(decoder_inputs): # if time == 0: # hidden_state = zero_state(num_decoder_symbols, batch_size) if time > 0: tf.get_variable_scope().reuse_variables() # pylint: disable=cell-var-from-loop # call_cell = lambda: cell(input_, state) generate_logit = lambda: _linear(decoder_inputs[time], num_decoder_symbols, True) # pylint: enable=cell-var-from-loop if sequence_length is not None: logit = _step( time, sequence_length, min_sequence_length, max_sequence_length, zero_logit, generate_logit) else: logit = generate_logit decoder_outputs.append(logit) return decoder_outputs
def generate_sequence_output(encoder_outputs, encoder_state, num_decoder_symbols, sequence_length, num_heads=1, dtype=dtypes.float32, use_attention=True, loop_function=None, scope=None, DNN_at_output=False, forward_only=False): with variable_scope.variable_scope(scope or "non-attention_RNN"): attention_encoder_outputs = list() sequence_attention_weights = list() # copy over logits once out of sequence_length if encoder_outputs[0].get_shape().ndims != 1: (fixed_batch_size, output_size) = encoder_outputs[0].get_shape().with_rank(2) else: fixed_batch_size = encoder_outputs[0].get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(encoder_outputs[0])[0] if sequence_length is not None: sequence_length = math_ops.to_int32(sequence_length) if sequence_length is not None: # Prepare variables zero_logit = array_ops.zeros( array_ops.pack([batch_size, num_decoder_symbols]), encoder_outputs[0].dtype) zero_logit.set_shape( tensor_shape.TensorShape([fixed_batch_size.value, num_decoder_symbols])) min_sequence_length = math_ops.reduce_min(sequence_length) max_sequence_length = math_ops.reduce_max(sequence_length) for time, input_ in enumerate(encoder_outputs): if time > 0: variable_scope.get_variable_scope().reuse_variables() if not DNN_at_output: generate_logit = lambda: linear_transformation(encoder_outputs[time], output_size, num_decoder_symbols) else: generate_logit = lambda: multilayer_perceptron(encoder_outputs[time], output_size, 200, num_decoder_symbols, forward_only=forward_only) # pylint: enable=cell-var-from-loop if sequence_length is not None: logit = _step( time, sequence_length, min_sequence_length, max_sequence_length, zero_logit, generate_logit) else: logit = generate_logit attention_encoder_outputs.append(logit) if DNN_at_output: regularizers = get_multilayer_perceptron_regularizers() else: regularizers = get_linear_transformation_regularizers() return attention_encoder_outputs, sequence_attention_weights, regularizers
def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): """Decodes the output of a softmax. Can use either greedy search (also known as best path) or a constrained dictionary search. Arguments: y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, )` containing the sequence length for each batch item in `y_pred`. greedy: perform much faster best-path search if `true`. This does not use a dictionary. beam_width: if `greedy` is `false`: a beam search decoder will be used with a beam of this width. top_paths: if `greedy` is `false`, how many of the most probable paths will be returned. Returns: Tuple: List: if `greedy` is `true`, returns a list of one element that contains the decoded sequence. If `false`, returns the `top_paths` most probable decoded sequences. Important: blank labels are returned as `-1`. Tensor `(top_paths, )` that contains the log probability of each decoded sequence. """ y_pred = math_ops.log(array_ops.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) input_length = math_ops.to_int32(input_length) if greedy: (decoded, log_prob) = ctc.ctc_greedy_decoder( inputs=y_pred, sequence_length=input_length) else: (decoded, log_prob) = ctc.ctc_beam_search_decoder( inputs=y_pred, sequence_length=input_length, beam_width=beam_width, top_paths=top_paths) decoded_dense = [ sparse_ops.sparse_to_dense( st.indices, st.dense_shape, st.values, default_value=-1) for st in decoded ] return (decoded_dense, log_prob) # HIGH ORDER FUNCTIONS