我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.reduce_sum()。
def finalize(self, outputs : BeamSearchOptimizationDecoderOutput, final_state : BeamSearchOptimizationDecoderState, sequence_lengths): # all output fields are [max_time, batch_size, ...] predicted_ids = tf.contrib.seq2seq.gather_tree( outputs.predicted_ids, outputs.parent_ids, sequence_length=sequence_lengths, name='predicted_ids') total_loss = tf.reduce_sum(outputs.loss, axis=0, name='violation_loss') predicted_time = tf.shape(predicted_ids)[0] last_score = predicted_time-1 with tf.name_scope('gold_score'): gold_score = outputs.gold_score[last_score] with tf.name_scope('sequence_scores'): sequence_scores = outputs.scores[last_score] return FinalBeamSearchOptimizationDecoderOutput(beam_search_decoder_output=outputs, predicted_ids=predicted_ids, scores=sequence_scores, gold_score=gold_score, gold_beam_id=final_state.gold_beam_id, num_available_beams=final_state.num_available_beams, total_violation_loss=total_loss), final_state
def bag_of_tokens(config, labels, label_lengths): if config.train_output_embeddings: with tf.variable_scope('embed', reuse=True): output_embeddings = tf.get_variable('output_embedding') else: output_embeddings = tf.constant(config.output_embedding_matrix) #everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32) #everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32) labels = tf.constant(np.array(labels)) embedded_output = tf.gather(output_embeddings, labels) print('embedded_output before', embedded_output) #mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32) # note: this multiplication will broadcast the mask along all elements of the depth dimension # (which is why we run the expand_dims to choose how to broadcast) #embedded_output = embedded_output * tf.expand_dims(mask, axis=2) #print('embedded_output after', embedded_output) return tf.reduce_sum(embedded_output, axis=1)
def triplet_loss(anchor, positive, negative, alpha): """Calculate the triplet loss according to the FaceNet paper Args: anchor: the embeddings for the anchor images. positive: the embeddings for the positive images. negative: the embeddings for the negative images. Returns: the triplet loss according to the FaceNet paper as a float tensor. """ with tf.variable_scope('triplet_loss'): pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) return loss
def build_model(self): self.q = tf.placeholder(tf.float32, [self.reader.vocab_size], name="question") self.a = tf.placeholder(tf.float32, [self.reader.vocab_size], name="answer") self.build_encoder() self.build_decoder() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = tf.reduce_sum(tf.log(self.p_x_i)) self.loss = tf.reduce_mean(self.e_loss + self.g_loss) self.optim = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(-self.loss) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("decoder loss", self.g_loss) _ = tf.scalar_summary("loss", self.loss)
def calculate_loss_distill_boost(self, predictions, labels_distill, labels, **unused_params): with tf.name_scope("loss_distill_boost"): print("loss_distill_boost") epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) batch_size = tf.shape(float_labels)[0] float_labels_distill = tf.cast(labels_distill, tf.float32) error = tf.negative(float_labels * tf.log(float_labels_distill + epsilon) + ( 1 - float_labels) * tf.log(1 - float_labels_distill + epsilon)) error = tf.reduce_sum(error,axis=1,keep_dims=True) alpha = error / tf.reduce_sum(error) * tf.cast(batch_size,dtype=tf.float32) alpha = tf.clip_by_value(alpha, 0.5, 5) alpha = alpha / tf.reduce_sum(alpha) * tf.cast(batch_size,dtype=tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss * alpha) return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params): with tf.name_scope("loss_xent"): epsilon = 10e-6 vocab_size = predictions.get_shape().as_list()[1] float_labels = tf.cast(labels, tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss) neg_labels = 1 - float_labels predictions_pos = predictions*float_labels+10*neg_labels predictions_minpos = tf.reduce_min(predictions_pos,axis=1,keep_dims=True) predictions_neg = predictions*neg_labels-10*float_labels predictions_maxneg = tf.reduce_max(predictions_neg,axis=1,keep_dims=True) mask_1 = tf.cast(tf.greater_equal(predictions_neg, predictions_minpos),dtype=tf.float32) mask_2 = tf.cast(tf.less_equal(predictions_pos, predictions_maxneg),dtype=tf.float32) cross_entropy_loss = cross_entropy_loss*(mask_1+mask_2)*10 + cross_entropy_loss return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss(self, predictions, labels, **unused_params): bound = FLAGS.softmax_bound vocab_size_1 = bound with tf.name_scope("loss_softmax"): epsilon = 10e-8 float_labels = tf.cast(labels, tf.float32) labels_1 = float_labels[:,:vocab_size_1] predictions_1 = predictions[:,:vocab_size_1] cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1) lables_2 = float_labels[:,vocab_size_1:] predictions_2 = predictions[:,vocab_size_1:] # l1 normalization (labels are no less than 0) label_rowsum = tf.maximum( tf.reduce_sum(lables_2, 1, keep_dims=True), epsilon) label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True) norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1) predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True) softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1) softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + ( 1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon) softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1)) return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def calculate_loss(self, predictions, labels, weights=None, **unused_params): with tf.name_scope("loss_xent"): epsilon = 10e-6 if FLAGS.label_smoothing: float_labels = smoothing(labels) else: float_labels = tf.cast(labels, tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss) if weights is not None: print cross_entropy_loss, weights weighted_loss = tf.einsum("ij,i->ij", cross_entropy_loss, weights) print "create weighted_loss", weighted_loss return tf.reduce_mean(tf.reduce_sum(weighted_loss, 1)) else: return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def compute_loss(self, decoder_output, _features, labels): """Computes the loss for this model. Returns a tuple `(losses, loss)`, where `losses` are the per-batch losses and loss is a single scalar tensor to minimize. """ #pylint: disable=R0201 # Calculate loss per example-timestep of shape [B, T] losses = seq2seq_losses.cross_entropy_sequence_loss( logits=decoder_output.logits[:, :, :], targets=tf.transpose(labels["target_ids"][:, 1:], [1, 0]), sequence_length=labels["target_len"] - 1) # Calculate the average log perplexity loss = tf.reduce_sum(losses) / tf.to_float( tf.reduce_sum(labels["target_len"] - 1)) return losses, loss
def get_dice_coef(logits, labels): """Compute dice coefficient. Args: logits: Softmax probability applied to fuse layers. labels: Correct annotations (0 or 1). Returns: Mean dice coefficient over full tensor. Source: https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/cost.py#L125 """ smooth = 1e-5 inter = tf.reduce_sum(tf.multiply(logits, labels)) l = tf.reduce_sum(logits) r = tf.reduce_sum(labels) return tf.reduce_mean((2.0 * inter + smooth) / (l + r + smooth))
def __init__(self, embedding): self.sess = tf.Session() self.inputs = tf.placeholder(tf.float32, [None, embedding.shape[1]], name='inputs') self.test_vec = tf.placeholder(tf.float32, [1, embedding.shape[1]], name='test_vec') self.cos_distance = tf.matmul(self.inputs, tf.transpose(self.test_vec)) #----------------------------------------------------------------------- # Compute normalized embedding matrix #----------------------------------------------------------------------- row_sum = tf.reduce_sum(tf.square(self.inputs), axis=1, keep_dims=True) norm = tf.sqrt(row_sum) self.normalized = self.inputs / norm self.embedding = self.sess.run(self.normalized, feed_dict={self.inputs: embedding}) #---------------------------------------------------------------------------
def bin_stats(predictions: tf.Tensor, labels: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """ Calculate f1, precision and recall from binary classification expected and predicted values. :param predictions: 2-d tensor (batch, predictions) of predicted 0/1 classes :param labels: 2-d tensor (batch, labels) of expected 0/1 classes :return: a tuple of batched (f1, precision and recall) values """ predictions = tf.cast(predictions, tf.int32) labels = tf.cast(labels, tf.int32) true_positives = tf.reduce_sum((predictions * labels), axis=1) false_positives = tf.reduce_sum(tf.cast(tf.greater(predictions, labels), tf.int32), axis=1) false_negatives = tf.reduce_sum(tf.cast(tf.greater(labels, predictions), tf.int32), axis=1) recall = true_positives / (true_positives + false_negatives) precision = true_positives / (true_positives + false_positives) f1_score = 2 / (1 / precision + 1 / recall) return f1_score, precision, recall
def bin_dice(predictions: tf.Tensor, labels: tf.Tensor) -> tf.Tensor: """ Calculate Sorensen–Dice coefficient from the given binary classification expected and predicted values. The coefficient is defined as :math:`2*|X \cup Y| / (|X| + |Y|)`. :param predictions: 2-d tensor (batch, predictions) of predicted 0/1 classes :param labels: 2-d tensor (batch, labels) of expected 0/1 classes :return: batched Sørensen–Dice coefficients """ predictions = tf.cast(predictions, tf.int32) labels = tf.cast(labels, tf.int32) true_positives = tf.reduce_sum((predictions * labels), axis=1) pred_positives = tf.reduce_sum(predictions, axis=1) label_positives = tf.reduce_sum(labels, axis=1) return 2 * true_positives / (pred_positives + label_positives)
def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn): def leapfrog(pos, vel, step, i): de_dp_ = tf.gradients(tf.reduce_sum(energy_fn(pos)), pos)[0] new_vel_ = vel - step * de_dp_ new_pos_ = pos + step * new_vel_ return [new_pos_, new_vel_, step, tf.add(i, 1)] def condition(pos, vel, step, i): return tf.less(i, n_steps) de_dp = tf.gradients(tf.reduce_sum(energy_fn(initial_pos)), initial_pos)[0] vel_half_step = initial_vel - 0.5 * stepsize * de_dp pos_full_step = initial_pos + stepsize * vel_half_step i = tf.constant(0) final_pos, new_vel, _, _ = tf.while_loop(condition, leapfrog, [pos_full_step, vel_half_step, stepsize, i]) de_dp = tf.gradients(tf.reduce_sum(energy_fn(final_pos)), final_pos)[0] final_vel = new_vel - 0.5 * stepsize * de_dp return final_pos, final_vel
def build_model(self): Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z]) Y = tf.placeholder(tf.float32, [self.batch_size, self.dim_y]) image_real = tf.placeholder(tf.float32, [self.batch_size]+self.image_shape) h4 = self.generate(Z,Y) #image_gen comes from sigmoid output of generator image_gen = tf.nn.sigmoid(h4) raw_real2 = self.discriminate(image_real, Y) #p_real = tf.nn.sigmoid(raw_real) p_real=tf.reduce_mean(raw_real2) raw_gen2 = self.discriminate(image_gen, Y) #p_gen = tf.nn.sigmoid(raw_gen) p_gen = tf.reduce_mean(raw_gen2) discrim_cost = tf.reduce_sum(raw_real2) - tf.reduce_sum(raw_gen2) gen_cost = -tf.reduce_mean(raw_gen2) return Z, Y, image_real, discrim_cost, gen_cost, p_real, p_gen
def categorical_crossentropy_3d(y_true, y_predicted): """ Computes categorical cross-entropy loss for a softmax distribution in a hot-encoded 3D array with shape (num_samples, num_classes, dim1, dim2, dim3) Parameters ---------- y_true : keras.placeholder [batches, dim0,dim1,dim2] Placeholder for data holding the ground-truth labels encoded in a one-hot representation y_predicted : keras.placeholder [batches,channels,dim0,dim1,dim2] Placeholder for data holding the softmax distribution over classes Returns ------- scalar Categorical cross-entropy loss value """ y_true_flatten = K.flatten(y_true) y_pred_flatten = K.flatten(y_predicted) y_pred_flatten_log = -K.log(y_pred_flatten + K.epsilon()) num_total_elements = K.sum(y_true_flatten) # cross_entropy = K.dot(y_true_flatten, K.transpose(y_pred_flatten_log)) cross_entropy = tf.reduce_sum(tf.multiply(y_true_flatten, y_pred_flatten_log)) mean_cross_entropy = cross_entropy / (num_total_elements + K.epsilon()) return mean_cross_entropy
def step_with_training(self, training=None): def step(inputs, states): input_shape = K.int_shape(inputs) y_tm1 = self.layer.preprocess_input( K.expand_dims(states[0], axis=1), training ) y_tm1 = K.reshape(y_tm1, (-1, input_shape[-1])) inputs_sum = tf.reduce_sum(inputs) def inputs_f(): return inputs def output_f(): return y_tm1 current_inputs = tf.case( [(tf.equal(inputs_sum, 0.0), output_f)], default=inputs_f ) return self.layer.step( current_inputs, states ) return step
def l1_regularizer(weight=1.0, scope=None): """Define a L1 regularizer. Args: weight: scale the loss by this factor. scope: Optional scope for op_scope. Returns: a regularizer function. """ def regularizer(tensor): with tf.op_scope([tensor], scope, 'L1Regularizer'): l1_weight = tf.convert_to_tensor(weight, dtype=tensor.dtype.base_dtype, name='weight') return tf.mul(l1_weight, tf.reduce_sum(tf.abs(tensor)), name='value') return regularizer
def l1_l2_regularizer(weight_l1=1.0, weight_l2=1.0, scope=None): """Define a L1L2 regularizer. Args: weight_l1: scale the L1 loss by this factor. weight_l2: scale the L2 loss by this factor. scope: Optional scope for op_scope. Returns: a regularizer function. """ def regularizer(tensor): with tf.op_scope([tensor], scope, 'L1L2Regularizer'): weight_l1_t = tf.convert_to_tensor(weight_l1, dtype=tensor.dtype.base_dtype, name='weight_l1') weight_l2_t = tf.convert_to_tensor(weight_l2, dtype=tensor.dtype.base_dtype, name='weight_l2') reg_l1 = tf.mul(weight_l1_t, tf.reduce_sum(tf.abs(tensor)), name='value_l1') reg_l2 = tf.mul(weight_l2_t, tf.nn.l2_loss(tensor), name='value_l2') return tf.add(reg_l1, reg_l2, name='value') return regularizer
def l1_loss(tensor, weight=1.0, scope=None): """Define a L1Loss, useful for regularize, i.e. lasso. Args: tensor: tensor to regularize. weight: scale the loss by this factor. scope: Optional scope for op_scope. Returns: the L1 loss op. """ with tf.op_scope([tensor], scope, 'L1Loss'): weight = tf.convert_to_tensor(weight, dtype=tensor.dtype.base_dtype, name='loss_weight') loss = tf.mul(weight, tf.reduce_sum(tf.abs(tensor)), name='value') tf.add_to_collection(LOSSES_COLLECTION, loss) return loss
def recode_cost(self, inputs, variation, eps=1e-5, **kwargs): """ Cost for given input batch of samples, under current params. """ h = self.get_h_inputs(inputs) z_mu = tf.matmul(h, self.params['Mhz']) + self.params['bMhz'] z_sig = tf.matmul(h, self.params['Shz']) + self.params['bShz'] # KL divergence between latent space induced by encoder and ... lat_loss = -tf.reduce_sum(1 + z_sig - z_mu**2 - tf.exp(z_sig), 1) z = z_mu + tf.sqrt(tf.exp(z_sig)) * variation h = self.get_h_latents(z) x_mu = self.decoding(tf.matmul(h, self.params['Mhx']) + self.params['bMhx']) x_sig = self.decoding(tf.matmul(h, self.params['Shx']) + self.params['bShx']) # x_sig = tf.clip_by_value(x_mu * (1 - x_mu), .05, 1) # decoding likelihood term like_loss = tf.reduce_sum(tf.log(x_sig + eps) + (inputs - x_mu)**2 / x_sig, 1) # # Mean cross entropy between input and encode-decoded input. # like_loss = 2 * tf.reduce_sum(functions.cross_entropy(inputs, x_mu), 1) return .5 * tf.reduce_mean(like_loss + lat_loss)
def Minibatch_Discriminator(input, num_kernels=100, dim_per_kernel=5, init=False, name='MD'): num_inputs=df_dim*4 theta = tf.get_variable(name+"/theta",[num_inputs, num_kernels, dim_per_kernel], initializer=tf.random_normal_initializer(stddev=0.05)) log_weight_scale = tf.get_variable(name+"/lws",[num_kernels, dim_per_kernel], initializer=tf.constant_initializer(0.0)) W = tf.mul(theta, tf.expand_dims(tf.exp(log_weight_scale)/tf.sqrt(tf.reduce_sum(tf.square(theta),0)),0)) W = tf.reshape(W,[-1,num_kernels*dim_per_kernel]) x = input x=tf.reshape(x, [batchsize,num_inputs]) activation = tf.matmul(x, W) activation = tf.reshape(activation,[-1,num_kernels,dim_per_kernel]) abs_dif = tf.mul(tf.reduce_sum(tf.abs(tf.sub(tf.expand_dims(activation,3),tf.expand_dims(tf.transpose(activation,[1,2,0]),0))),2), 1-tf.expand_dims(tf.constant(np.eye(batchsize),dtype=np.float32),1)) f = tf.reduce_sum(tf.exp(-abs_dif),2)/tf.reduce_sum(tf.exp(-abs_dif)) print(f.get_shape()) print(input.get_shape()) return tf.concat(1,[x, f])
def output_module(self): """ 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. 2.non-linearity of query and hidden state to get label. input: query_embedding:[batch_size,embed_size], hidden state:[batch_size,block_size,hidden_size] of memory :return:y: predicted label.[] """ # 1.use attention mechanism between query and hidden states, to get weighted sum of hidden state. # 1.1 get possibility distribution (of similiarity) p=tf.nn.softmax(tf.multiply(tf.expand_dims(self.query_embedding,axis=1),self.hidden_state)) #shape:[batch_size,block_size,hidden_size]<---query_embedding_expand:[batch_size,1,hidden_size]; hidden_state:[batch_size,block_size,hidden_size] # 1.2 get weighted sum of hidden state u=tf.reduce_sum(tf.multiply(p,self.hidden_state),axis=1) #shape:[batch_size,hidden_size]<----------([batch_size,block_size,hidden_size],[batch_size,block_size,hidden_size]) # 2.non-linearity of query and hidden state to get label H_u_matmul=tf.matmul(u,self.H)+self.h_u_bias #shape:[batch_size,hidden_size]<----([batch_size,hidden_size],[hidden_size,hidden_size]) activation=self.activation(self.query_embedding + H_u_matmul,scope="query_add_hidden") #shape:[batch_size,hidden_size] activation = tf.nn.dropout(activation,keep_prob=self.dropout_keep_prob) #shape:[batch_size,hidden_size] y=tf.matmul(activation,self.R)+self.y_bias #shape:[batch_size,vocab_size]<-----([batch_size,hidden_size],[hidden_size,vocab_size]) return y #shape:[batch_size,vocab_size]
def loss(self, l2_lambda=0.0001): # 0.001 with tf.name_scope("loss"): # input: `logits`:[batch_size, num_classes], and `labels`:[batch_size] # output: A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the softmax cross entropy loss. losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label,logits=self.logits); # sigmoid_cross_entropy_with_logits.#losses=tf.nn.softmax_cross_entropy_with_logits(labels=self.input_y,logits=self.logits) # print("1.sparse_softmax_cross_entropy_with_logits.losses:",losses) # shape=(?,) loss = tf.reduce_mean(losses) # print("2.loss.loss:", loss) #shape=() l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if ('bias' not in v.name ) and ('alpha' not in v.name)]) * l2_lambda loss = loss + l2_losses return loss #def loss_seq2seq(self): # with tf.variable_scope("loss"): # losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y_label, logits=self.logits);#losses:[batch_size,self.decoder_sent_length] # loss_batch=tf.reduce_sum(losses,axis=1)/self.decoder_sent_length #loss_batch:[batch_size] # loss=tf.reduce_mean(loss_batch) # l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * self.l2_lambda # loss = loss + l2_losses # return loss
def SoftArgmin(outputLeft, outputRight, D=192): left_result_D = outputLeft right_result_D = outputRight left_result_D_squeeze = tf.squeeze(left_result_D, axis=[0, 4]) right_result_D_squeeze = tf.squeeze(right_result_D, axis=[0, 4]) # 192 256 512 left_result_softmax = tf.nn.softmax(left_result_D_squeeze, dim=0) right_result_softmax = tf.nn.softmax(right_result_D_squeeze, dim=0) # 192 256 512 d_grid = tf.cast(tf.range(D), tf.float32) d_grid = tf.reshape(d_grid, (-1, 1, 1)) d_grid = tf.tile(d_grid, [1, 256, 512]) left_softargmin = tf.reduce_sum(tf.multiply(left_result_softmax, d_grid), axis=0, keep_dims=True) right_softargmin = tf.reduce_sum(tf.multiply(right_result_softmax, d_grid), axis=0, keep_dims=True) return left_softargmin, right_softargmin
def custom_loss(y_true, y_pred): # Get prediction pred_box_xy = tf.sigmoid(y_pred[..., :2]) pred_box_wh = y_pred[..., 2:4] pred_box_conf = tf.sigmoid(y_pred[..., 4]) # Get ground truth true_box_xy = y_true[..., :2] true_box_wh = y_true[..., 2:4] true_box_conf = y_true[..., 4] # Determine the mask: simply the position of the ground truth boxes (the predictors) true_mask = tf.expand_dims(y_true[..., 4], axis=-1) # Calculate the loss. A scale can be associated with each loss, indicating how important # the loss is. The bigger the scale, more important the loss is. loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) * true_mask) * 1.0 loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) * true_mask) * 1.0 loss_conf = tf.reduce_sum(tf.square(true_box_conf - pred_box_conf)) * 1.2 loss = loss_xy + loss_wh + loss_conf return loss
def build_loss(self, inp, output): y_gt = inp['y_gt'] y_out = output['y_out'] ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out}) num_ex_f = tf.to_float(tf.shape(inp['x'])[0]) ce = tf.reduce_sum(ce) / num_ex_f self.add_loss(ce) total_loss = self.get_loss() self.register_var('loss', total_loss) ans = tf.argmax(y_gt, 1) correct = tf.equal(ans, tf.argmax(y_out, 1)) top5_acc = tf.reduce_sum(tf.to_float( tf.nn.in_top_k(y_out, ans, 5))) / num_ex_f self.register_var('top5_acc', top5_acc) acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f self.register_var('acc', acc) return total_loss
def build_loss_grad(self, inp, output): y_gt = inp['y_gt'] y_out = output['y_out'] ce = tfplus.nn.CE()({'y_gt': y_gt, 'y_out': y_out}) num_ex_f = tf.to_float(tf.shape(inp['x'])[0]) ce = tf.reduce_sum(ce) / num_ex_f self.add_loss(ce) learn_rate = self.get_option('learn_rate') total_loss = self.get_loss() self.register_var('loss', total_loss) eps = self.get_option('adam_eps') optimizer = tf.train.AdamOptimizer(learn_rate, epsilon=eps) global_step = tf.Variable(0.0) self.register_var('step', global_step) train_step = optimizer.minimize( total_loss, global_step=global_step) self.register_var('train_step', train_step) correct = tf.equal(tf.argmax(y_gt, 1), tf.argmax(y_out, 1)) acc = tf.reduce_sum(tf.to_float(correct)) / num_ex_f self.register_var('acc', acc) pass
def _score(self, prev_decoder_state, prev_embedding): # Returns scores in a tensor of shape [batch_size, input_sequence_length] if self.mode == 'decode': query_part = self.query_attention_partial_score_placeholder encoder_part = self.encoder_state_attention_partial_scores_placeholder else: query_part = self.query_attention_partial_score encoder_part = self.encoder_state_attention_partial_scores embedding_part = tf.matmul(prev_embedding, self.attention_w_e) output = tf.matmul(prev_decoder_state, self.attention_w) + embedding_part + query_part + encoder_part + self.attention_b output = tf.tanh(output) output = tf.reduce_sum(self.attention_v * output, axis=2) output = tf.transpose(output, [1, 0]) # Handle input document padding by giving a large penalty, eliminating it from the weighted average padding_penalty = -1e20 * tf.to_float(1 - tf.sign(self.documents_placeholder)) masked = output + padding_penalty return masked
def _attention(self, prev_decoder_state, prev_embedding): with tf.variable_scope('attention') as scope: # e = score of shape [batch_size, output_seq_length, input_seq_length], e_{ij} = score(s_{i-1}, h_j) # e_i = score of shape [batch_size, input_seq_length], e_ij = score(prev_decoder_state, h_j) e_i = self._score(prev_decoder_state, prev_embedding) # alpha_i = softmax(e_i) of shape [batch_size, input_seq_length] alpha_i = tf.nn.softmax(e_i) resized_alpha_i = tf.reshape(tf.tile(alpha_i, [1, self.encoder_output_size]), [self.batch_size, -1, self.encoder_output_size]) if self.mode == 'decode': c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.pre_computed_encoder_states_placeholder), axis=1) else: c_i = tf.reduce_sum(tf.multiply(resized_alpha_i, self.encoder_outputs), axis=1) return c_i, e_i
def actor_loss(self): if self.config.mode == 'discrete': log_prob = tf.reduce_sum(tf.log(self.a_prob) * tf.one_hot(self.action_input, self.action_dim, dtype=tf.float32), axis=1, keep_dims=True) # use entropy to encourage exploration exp_v = log_prob * self.TD_loss entropy = -tf.reduce_sum(self.a_prob * tf.log(self.a_prob), axis=1, keep_dims=True) # encourage exploration exp_v = self.config.ENTROPY_BETA * entropy + exp_v return tf.reduce_mean(-exp_v) # ????????log_prb????????????????????TD_loss elif self.config.mode == 'continuous': log_prob = self.action_normal_dist.log_prob(self.action_input) exp_v = log_prob * self.TD_loss # use entropy to encourage exploration exp_v = self.config.ENTROPY_BETA * self.action_normal_dist.entropy() + exp_v return tf.reduce_mean(-exp_v)
def encode(self, inputs, _input_length, _parses): with tf.variable_scope('BagOfWordsEncoder'): W = tf.get_variable('W', (self.embed_size, self.output_size)) b = tf.get_variable('b', shape=(self.output_size,), initializer=tf.constant_initializer(0, tf.float32)) enc_hidden_states = tf.tanh(tf.tensordot(inputs, W, [[2], [0]]) + b) enc_final_state = tf.reduce_sum(enc_hidden_states, axis=1) #assert enc_hidden_states.get_shape()[1:] == (self.config.max_length, self.config.hidden_size) if self._cell_type == 'lstm': enc_final_state = (tf.contrib.rnn.LSTMStateTuple(enc_final_state, enc_final_state),) enc_output = tf.nn.dropout(enc_hidden_states, keep_prob=self._dropout, seed=12345) return enc_output, enc_final_state
def decov_loss(xs): """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 'Reducing Overfitting In Deep Networks by Decorrelating Representation' """ x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) m = tf.reduce_mean(x, 0, True) z = tf.expand_dims(x-m, 2) corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0) corr_frob_sqr = tf.reduce_sum(tf.square(corr)) corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) loss = 0.5*(corr_frob_sqr - corr_diag_sqr) return loss
def build_model(self): self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input") self.x_idx = tf.placeholder(tf.int32, [None], name="x_idx") self.build_encoder() self.build_generator() # Kullback Leibler divergence self.e_loss = -0.5 * tf.reduce_sum(1 + self.log_sigma_sq - tf.square(self.mu) - tf.exp(self.log_sigma_sq)) # Log likelihood self.g_loss = -tf.reduce_sum(tf.log(tf.gather(self.p_x_i, self.x_idx) + 1e-10)) self.loss = self.e_loss + self.g_loss self.encoder_var_list, self.generator_var_list = [], [] for var in tf.trainable_variables(): if "encoder" in var.name: self.encoder_var_list.append(var) elif "generator" in var.name: self.generator_var_list.append(var) # optimizer for alternative update self.optim_e = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.e_loss, global_step=self.step, var_list=self.encoder_var_list) self.optim_g = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.g_loss, global_step=self.step, var_list=self.generator_var_list) # optimizer for one shot update self.optim = tf.train.AdamOptimizer(learning_rate=self.lr) \ .minimize(self.loss, global_step=self.step) _ = tf.scalar_summary("encoder loss", self.e_loss) _ = tf.scalar_summary("generator loss", self.g_loss) _ = tf.scalar_summary("total loss", self.loss)
def create_model(self, model_input, vocab_size, num_frames, **unused_params): """Creates a model which uses a logistic classifier over the average of the frame-level features. This class is intended to be an example for implementors of frame level models. If you want to train a model over averaged features it is more efficient to average them beforehand rather than on the fly. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) feature_size = model_input.get_shape().as_list()[2] max_frames = model_input.get_shape().as_list()[1] denominators = tf.reshape( tf.tile(num_frames, [1, feature_size]), [-1, feature_size]) avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators output = slim.fully_connected( avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid, weights_regularizer=slim.l2_regularizer(1e-8)) return {"predictions": output}
def sub_moe(self, model_input, vocab_size, num_mixtures = None, l2_penalty=1e-8, scopename="", **unused_params): num_mixtures = num_mixtures or FLAGS.moe_num_mixtures gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates"+scopename) expert_activations = slim.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts"+scopename) gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures final_probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, [-1, vocab_size]) return model_input, final_probabilities
def create_model(self, model_input, vocab_size, num_frames, **unused_params): shape = model_input.get_shape().as_list() frames_sum = tf.reduce_sum(tf.abs(model_input),axis=2) frames_true = tf.ones(tf.shape(frames_sum)) frames_false = tf.zeros(tf.shape(frames_sum)) frames_bool = tf.reshape(tf.where(tf.greater(frames_sum, frames_false), frames_true, frames_false),[-1,shape[1],1]) activation_1 = tf.reduce_max(model_input, axis=1) activation_2 = tf.reduce_sum(model_input*frames_bool, axis=1)/(tf.reduce_sum(frames_bool, axis=1)+1e-6) activation_3 = tf.reduce_min(model_input, axis=1) model_input_1, final_probilities_1 = self.sub_moe(activation_1,vocab_size,scopename="_max") model_input_2, final_probilities_2 = self.sub_moe(activation_2,vocab_size,scopename="_mean") model_input_3, final_probilities_3 = self.sub_moe(activation_3,vocab_size,scopename="_min") final_probilities = tf.stack((final_probilities_1,final_probilities_2,final_probilities_3),axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[shape[2], 3, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) activations = tf.stack((model_input_1, model_input_2, model_input_3), axis=2) weight = tf.nn.softmax(tf.einsum("aij,ijk->ajk", activations, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def calculate_loss(self, predictions, labels, **unused_params): with tf.name_scope("loss_frames"): epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) return tf.reduce_sum(cross_entropy_loss, 2)
def calculate_loss(self, predictions, labels, **unused_params): with tf.name_scope("loss_frames"): epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) return tf.reduce_sum(cross_entropy_loss, axis=2)
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size = 2 cnn_input = model_input cnn_size = FLAGS.cnn_cells num_filters = [cnn_size, cnn_size, cnn_size*2] filter_sizes = [1, 2, 3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, lstm_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = 10 pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) if layer < 3: num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) else: cnn_input = cnn_output cnn_output, num_t = self.kmax(cnn_input, num_filters=features_size, filter_sizes=num_extend, sub_scope="kmax") cnn_input = tf.reshape(cnn_output,[-1,features_size]) final_probilities = self.sub_moe(cnn_input,vocab_size) final_probilities = tf.reshape(final_probilities,[-1,num_extend,vocab_size]) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", cnn_output, weight2d), dim=1) result = {} result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = CnnKmaxModel().cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1), l2_penalty=0.0) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = tf.reduce_max(cnn_output,axis=1) moe_inputs.append(cnn_multiscale) final_probility = CnnKmaxModel().sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1), l2_penalty=0.0) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", tf.stop_gradient(moe_inputs), weight2d), dim=1) result = {} result["predictions"] = tf.reduce_sum(tf.stop_gradient(final_probilities)*weight, axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = tf.reduce_max(cnn_output,axis=1) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def sub_model(self, model_input, vocab_size, num_mixtures=None, l2_penalty=1e-8, sub_scope="", distill_labels=None,**unused_params): num_mixtures = num_mixtures or FLAGS.moe_num_mixtures class_size = 256 if distill_labels is not None: class_input = slim.fully_connected( distill_labels, class_size, activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="class_inputs") class_input = tf.nn.l2_normalize(class_input, dim=1) model_input = tf.concat((model_input, class_input),axis=1) gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates-"+sub_scope) expert_activations = slim.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts-"+sub_scope) gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures final_probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, [-1, vocab_size]) return final_probabilities
def calculate_loss_distill(self, predictions, labels_distill, labels, **unused_params): with tf.name_scope("loss_distill"): print("loss_distill") epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) float_labels_distill = tf.cast(labels_distill, tf.float32) embedding_mat = np.loadtxt("./resources/embedding_matrix.model") vocab_size = embedding_mat.shape[1] labels_size = float_labels.get_shape().as_list()[1] embedding_mat = tf.cast(embedding_mat,dtype=tf.float32) cross_entropy_loss_1 = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) float_labels_1 = float_labels[:,:vocab_size] labels_smooth = tf.matmul(float_labels_1,embedding_mat)/tf.reduce_sum(float_labels_1,axis=1,keep_dims=True) float_classes = labels_smooth for i in range(labels_size//vocab_size-1): float_classes = tf.concat((float_classes,labels_smooth),axis=1) cross_entropy_loss_2 = float_classes * tf.log(predictions + epsilon) + ( 1 - float_classes) * tf.log(1 - predictions + epsilon) cross_entropy_loss_3 = float_labels_distill * tf.log(predictions + epsilon) + ( 1 - float_labels_distill) * tf.log(1 - predictions + epsilon) cross_entropy_loss = cross_entropy_loss_1*0.5 + cross_entropy_loss_2*0.5 + cross_entropy_loss_3*0.5 cross_entropy_loss = tf.negative(cross_entropy_loss) return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_loss_negative(self, predictions_pos, predictions_neg, labels, **unused_params): with tf.name_scope("loss_negative"): epsilon = 10e-6 float_labels = tf.cast(labels, tf.float32) weight_pos = np.loadtxt(FLAGS.autoencoder_dir+"labels_uni.out") weight_pos = tf.reshape(tf.cast(weight_pos,dtype=tf.float32),[1,-1]) weight_pos = tf.log(tf.reduce_max(weight_pos)/weight_pos)+1 cross_entropy_loss_1 = float_labels * tf.log(predictions_pos + epsilon)*weight_pos + ( 1 - float_labels) * tf.log(1 - predictions_pos + epsilon) cross_entropy_loss_2 = (1-float_labels) * tf.log(predictions_neg + epsilon) + \ float_labels * tf.log(1 - predictions_neg + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss_1+cross_entropy_loss_2) return tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))
def calculate_mseloss(self, predictions, labels, **unused_params): with tf.name_scope("loss_mse"): float_labels = tf.cast(labels, tf.float32) mse_loss = tf.square(predictions-float_labels) return tf.reduce_mean(tf.reduce_sum(mse_loss, 1))
def calculate_loss_postprocess(self, predictions, labels, **unused_params): with tf.name_scope("loss_postprocess"): float_labels = tf.cast(labels, tf.float32) predictions_pos = predictions*float_labels + (1-float_labels) predictions_neg = predictions*(1-float_labels) min_pos = tf.stop_gradient(tf.reduce_min(predictions_pos)) max_neg = tf.stop_gradient(tf.reduce_max(predictions_neg)) predictions_pos_mistake = tf.nn.relu(max_neg-predictions_pos)-0.01*tf.nn.relu(predictions_pos-max_neg) predictions_neg_mistake = tf.nn.relu(predictions_neg-min_pos)-0.01*tf.nn.relu(min_pos-predictions_neg) postprocess_loss = predictions_pos_mistake + predictions_neg_mistake return tf.reduce_mean(tf.reduce_sum(postprocess_loss, 1))