我们从Python开源项目中,提取了以下45个代码示例,用于说明如何使用tensorflow.cumsum()。
def row_index(shape): """ Generate an X index for the given tensor. .. code-block:: python [ [ 0, 1, 2, ... width-1 ], [ 0, 1, 2, ... width-1 ], ... (x height) ] :param list[int] shape: :return: Tensor """ height = shape[0] width = shape[1] row_identity = tf.cumsum(tf.ones([width], dtype=tf.int32), exclusive=True) row_identity = tf.reshape(tf.tile(row_identity, [height]), [height, width]) return row_identity
def segment_indices(segment_ids, name=None): """Returns a `Tensor` of indices within each segment. segment_ids should be a sequence of non-decreasing non-negative integers that define a set of segments, e.g. [0, 0, 1, 2, 2, 2] defines 3 segments of length 2, 1 and 3. The return value is a `Tensor` containing the indices within each segment. Example input: [0, 0, 1, 2, 2, 2] Example output: [0, 1, 0, 0, 1, 2] Args: segment_ids: A 1-d `Tensor` containing an non-decreasing sequence of non-negative integers with type `tf.int32` or `tf.int64`. name: (Optional) A name for this operation. Returns: A `Tensor` containing the indices within each segment. """ with tf.name_scope(name, 'segment_indices'): segment_lengths = tf.segment_sum(tf.ones_like(segment_ids), segment_ids) segment_starts = tf.gather(tf.concat([[0], tf.cumsum(segment_lengths)], 0), segment_ids) return (tf.range(tf.size(segment_ids, out_type=segment_ids.dtype)) - segment_starts)
def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None): """Compute precision and recall from scores, true positives and false positives booleans arrays """ # Sort by score. with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]): # Sort detections by score. scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True) tp = tf.gather(tp, idxes) fp = tf.gather(fp, idxes) # Computer recall and precision. dtype = tf.float64 tp = tf.cumsum(tf.cast(tp, dtype), axis=0) fp = tf.cumsum(tf.cast(fp, dtype), axis=0) recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall') precision = _safe_div(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def preturn_network(rewards, discounts, values): # First reward must be zero, first discount must be one first_reward = tf.Assert( tf.reduce_all(tf.equal(rewards[:, 0, :], 0.0)), [rewards[:, 0, :]]) first_discount = tf.Assert( tf.reduce_all(tf.equal(discounts[:, 0, :], 1.0)), [discounts[:, 0, :]]) with tf.control_dependencies([first_reward, first_discount]): with tf.variable_scope('preturn'): accum_value_discounts = tf.cumprod(discounts, axis=1, exclusive=False) accum_reward_discounts = tf.cumprod(discounts, axis=1, exclusive=True) discounted_values = values * accum_value_discounts discounted_rewards = rewards * accum_reward_discounts cumulative_rewards = tf.cumsum(discounted_rewards, axis=1) preturns = cumulative_rewards + discounted_values util.activation_summary(preturns) return preturns
def get_S_loss_hao(mean_x, logcov_x, qv_alpha, qv_beta, qeta_mu, qeta_sigma, epsilon = 1e-8): sigma_px = 1.0 S1 = tf.digamma(qv_alpha) - tf.digamma(qv_alpha + qv_beta) S2 = tf.cumsum(tf.digamma(qv_beta) - tf.digamma(qv_alpha + qv_beta)) mean_x_expand = tf.expand_dims(mean_x, 1) logcov_x_expand = tf.expand_dims(logcov_x, 1) qeta_mu_expand = tf.expand_dims(tf.transpose(qeta_mu), 0) qeta_sigma_expand = tf.expand_dims(tf.transpose(qeta_sigma), 0) S3 = 0.5 * tf.reduce_sum(1 + logcov_x_expand - 2 * tf.log(sigma_px) \ - (tf.exp(logcov_x_expand) + tf.square(qeta_sigma_expand) \ + tf.square(mean_x_expand - qeta_mu_expand)) / tf.square(sigma_px), 2) S = S3 + tf.concat(0, [S1, [0.0]]) + tf.concat(0, [[0.0], S2]) # get the variational distribution q(z) S_max = tf.reduce_max(S, reduction_indices=1) S_whiten = S - tf.expand_dims(S_max, 1) qz = tf.exp(S_whiten) / tf.expand_dims(tf.reduce_sum(tf.exp(S_whiten), 1), 1) # Summarize the S loss # S_loss = -tf.reduce_sum(tf.log(tf.reduce_sum(tf.exp(S), 1))) S_loss = -tf.reduce_sum(S_max) - tf.reduce_sum(tf.log(tf.reduce_sum(tf.exp(S - tf.expand_dims(S_max, 1)), 1) + epsilon)) return S_loss, qz, S
def get_S_loss_hao(mean_x, logcov_x, qv_alpha, qv_beta, qeta_mu, qeta_sigma, sigma_px, epsilon = 1e-8): S1 = tf.digamma(qv_alpha) - tf.digamma(qv_alpha + qv_beta) S2 = tf.cumsum(tf.digamma(qv_beta) - tf.digamma(qv_alpha + qv_beta)) mean_x_expand = tf.expand_dims(mean_x, 1) logcov_x_expand = tf.expand_dims(logcov_x, 1) qeta_mu_expand = tf.expand_dims(tf.transpose(qeta_mu), 0) qeta_sigma_expand = tf.expand_dims(tf.transpose(qeta_sigma), 0) sigma_px_expand = tf.expand_dims(tf.transpose(sigma_px), 0) S3 = 0.5 * tf.reduce_sum(1 + logcov_x_expand - 2 * tf.log(sigma_px_expand) \ - (tf.exp(logcov_x_expand) + tf.square(qeta_sigma_expand) \ + tf.square(mean_x_expand - qeta_mu_expand)) / tf.square(sigma_px_expand), 2) S = S3 + tf.concat(0, [S1, [0.0]]) + tf.concat(0, [[0.0], S2]) # get the variational distribution q(z) S_max = tf.reduce_max(S, reduction_indices=1) S_whiten = S - tf.expand_dims(S_max, 1) qz = tf.exp(S_whiten) / tf.expand_dims(tf.reduce_sum(tf.exp(S_whiten), 1), 1) # Summarize the S loss # S_loss = -tf.reduce_sum(tf.log(tf.reduce_sum(tf.exp(S), 1))) S_loss = -tf.reduce_sum(S_max) - tf.reduce_sum(tf.log(tf.reduce_sum(tf.exp(S - tf.expand_dims(S_max, 1)), 1) + epsilon)) return S_loss, qz, S
def get_S_loss(alpha, beta, mean_x, logcov_x, mean_eta, logcov_eta, sigma2, epsilon=1e-8): mean_x_pad = tf.expand_dims(mean_x, 1) logcov_x_pad = tf.expand_dims(logcov_x, 1) mean_eta_pad = tf.expand_dims(mean_eta, 0) logcov_eta_pad = tf.expand_dims(logcov_eta, 0) S1 = tf.digamma(alpha) - tf.digamma(alpha + beta) S2 = tf.cumsum(tf.digamma(beta) - tf.digamma(alpha + beta)) S = 0.5 * tf.reduce_sum( \ 1 + logcov_x_pad - math.log(sigma2) \ - (tf.exp(logcov_x_pad) + tf.exp(logcov_eta_pad) + tf.square(mean_x_pad - mean_eta_pad)) / sigma2 , 2 \ ) \ + tf.concat(0, [S1, tf.constant([0.0])]) + tf.concat(0, [tf.constant([0.0]), S2]) assignments = tf.argmax(S, dimension=1) S_max = tf.reduce_max(S, reduction_indices=1) S_loss = -tf.reduce_sum(S_max) - tf.reduce_sum(tf.log(tf.reduce_sum(tf.exp(S - tf.expand_dims(S_max, 1)), reduction_indices = 1) + epsilon)) return assignments, S_loss
def get_words_from_chars(characters_list: List[str], sequence_lengths: List[int], name='chars_conversion'): with tf.name_scope(name=name): def join_charcaters_fn(coords): return tf.reduce_join(characters_list[coords[0]:coords[1]]) def coords_several_sequences(): end_coords = tf.cumsum(sequence_lengths) start_coords = tf.concat([[0], end_coords[:-1]], axis=0) coords = tf.stack([start_coords, end_coords], axis=1) coords = tf.cast(coords, dtype=tf.int32) return tf.map_fn(join_charcaters_fn, coords, dtype=tf.string) def coords_single_sequence(): return tf.reduce_join(characters_list, keep_dims=True) words = tf.cond(tf.shape(sequence_lengths)[0] > 1, true_fn=lambda: coords_several_sequences(), false_fn=lambda: coords_single_sequence()) return words
def get_weights(sequence, eos_id, include_first_eos=True): cumsum = tf.cumsum(tf.to_float(tf.not_equal(sequence, eos_id)), axis=1) range_ = tf.range(start=1, limit=tf.shape(sequence)[1] + 1) range_ = tf.tile(tf.expand_dims(range_, axis=0), [tf.shape(sequence)[0], 1]) weights = tf.to_float(tf.equal(cumsum, tf.to_float(range_))) if include_first_eos: weights = weights[:,:-1] shape = [tf.shape(weights)[0], 1] weights = tf.concat([tf.ones(tf.stack(shape)), weights], axis=1) return tf.stop_gradient(weights)
def column_index(shape): """ Generate a Y index for the given tensor. .. code-block:: python [ [ 0, 0, 0, ... ], [ 1, 1, 1, ... ], [ n, n, n, ... ], ... [ height-1, height-1, height-1, ... ] ] :param list[int] shape: :return: Tensor """ height = shape[0] width = shape[1] column_identity = tf.ones([width], dtype=tf.int32) column_identity = tf.tile(column_identity, [height]) column_identity = tf.reshape(column_identity, [height, width]) column_identity = tf.cumsum(column_identity, exclusive=True) return column_identity
def precision_recall(num_gbboxes, num_detections, tp, fp, scores, dtype=tf.float64, scope=None): """Compute precision and recall from scores, true positives and false positives booleans arrays """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict): d_precision = {} d_recall = {} for c in num_gbboxes.keys(): scope = 'precision_recall_%s' % c p, r = precision_recall(num_gbboxes[c], num_detections[c], tp[c], fp[c], scores[c], dtype, scope) d_precision[c] = p d_recall[c] = r return d_precision, d_recall # Sort by score. with tf.name_scope(scope, 'precision_recall', [num_gbboxes, num_detections, tp, fp, scores]): # Sort detections by score. scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True) tp = tf.gather(tp, idxes) fp = tf.gather(fp, idxes) # Computer recall and precision. tp = tf.cumsum(tf.cast(tp, dtype), axis=0) fp = tf.cumsum(tf.cast(fp, dtype), axis=0) recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall') precision = _safe_div(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def certainty(self): certainty = self.seg_prediction * tf.log(self.seg_prediction) certainty = -tf.reduce_sum(certainty,reduction_indices=2) s1 = tf.ones(tf.shape(certainty)) csum = tf.cumsum(s1,axis=1) mask = tf.less_equal(csum,tf.cast(tf.tile(tf.expand_dims(self._length,1),[1,tf.shape(certainty)[1]]),tf.float32)) mask = tf.select(mask, tf.ones(tf.shape(certainty)), tf.zeros(tf.shape(certainty))) certainty *= mask certainty = tf.reduce_sum(certainty, reduction_indices=1) return certainty
def __init__(self, features_shape, num_classes, seq_len, cell_type='lstm', reuse=False, add_bn=False, add_reg=False, scope="VA"): self.config = VisualAttentionConfig() self.config.features_shape = features_shape self.config.num_classes = num_classes self.reuse = reuse self.config.seq_len = seq_len self.inputs_placeholder = tf.placeholder(tf.float32, shape=tuple((None,)+ self.config.features_shape )) self.targets_placeholder = tf.placeholder(tf.float32, shape=tuple((None,) + self.config.targets_shape)) self.seq_len_placeholder = tf.placeholder(tf.int32, shape=tuple((None,) )) self.emission_num_layers = 1 self.loss_type = 'negative_l1_dist' self.cumsum = False self.scope = scope if add_bn: self.norm_fn = tf.contrib.layers.batch_norm else: self.norm_fn = None if add_reg: self.reg_fn = tf.nn.l2_loss else: self.reg_fn = None if cell_type == 'rnn': self.cell = tf.contrib.rnn.RNNCell elif cell_type == 'gru': self.cell = tf.contrib.rnn.GRUCell elif cell_type == 'lstm': self.cell = tf.contrib.rnn.LSTMCell else: raise ValueError('Input correct cell type')
def __init__(self, features_shape, num_classes, cell_type='lstm', seq_len=8, reuse=False, add_bn=False, add_reg=False, deeper = False, loss_type = 'negative_l1_dist', cum_sum=False, init_loc_size=(4,), num_objects=1, scope='RCNN'): self.config = MOTRecurrentCNNConfig() self.config.features_shape = features_shape self.config.num_classes = num_classes self.config.init_loc_size = init_loc_size self.config.seq_len = seq_len self.reuse = reuse self.deeper = deeper self.loss_type = loss_type self.cumsum = cum_sum self.scope = scope self.num_objects = num_objects self.inputs_placeholder = tf.placeholder(tf.float32, shape=tuple((None,None,)+ self.config.features_shape )) self.init_loc = tf.placeholder(tf.float32, shape=tuple((None,self.num_objects)+ self.config.init_loc_size)) self.targets_placeholder = tf.placeholder(tf.float32, shape=tuple((None,self.num_objects, None) + self.config.targets_shape)) self.seq_len_placeholder = tf.placeholder(tf.int32, shape=tuple((None,) )) if add_bn: self.norm_fn = tf.contrib.layers.batch_norm else: self.norm_fn = None if add_reg: self.reg_fn = tf.nn.l2_loss else: self.reg_fn = None if cell_type == 'rnn': self.cell = tf.contrib.rnn.RNNCell elif cell_type == 'gru': self.cell = tf.contrib.rnn.GRUCell elif cell_type == 'lstm': self.cell = tf.contrib.rnn.LSTMCell else: raise ValueError('Input correct cell type')
def __init__(self, features_shape, num_classes, seq_len, cell_type='lstm', reuse=False, add_bn=False, add_reg=False, scope="VA"): self.config = VisualAttentionConfig() self.config.features_shape = features_shape self.config.num_classes = num_classes self.reuse = reuse self.inputs_placeholder = tf.placeholder(tf.float32, shape=tuple((None,None,)+ self.config.features_shape )) self.init_loc = tf.placeholder(tf.float32, shape=tuple((None,)+ self.config.init_loc_size)) self.targets_placeholder = tf.placeholder(tf.float32, shape=tuple((None,None,) + self.config.targets_shape)) self.config.seq_len = seq_len self.seq_len_placeholder = tf.placeholder(tf.int32, shape=tuple((None,) )) self.emission_num_layers = 1 self.loss_type = 'negative_l1_dist' self.cumsum = False self.scope = scope if add_bn: self.norm_fn = tf.contrib.layers.batch_norm else: self.norm_fn = None if add_reg: self.reg_fn = tf.nn.l2_loss else: self.reg_fn = None if cell_type == 'rnn': self.cell = tf.contrib.rnn.RNNCell elif cell_type == 'gru': self.cell = tf.contrib.rnn.GRUCell elif cell_type == 'lstm': self.cell = tf.contrib.rnn.LSTMCell else: raise ValueError('Input correct cell type')
def __init__(self, features_shape, num_classes, cell_type='lstm', seq_len=8, reuse=False, add_bn=False, add_reg=False, deeper = False, loss_type = 'negative_l1_dist', cum_sum=False, scope='RCNN'): self.config = RecurrentCNNConfig() self.config.features_shape = features_shape self.config.num_classes = num_classes self.reuse = reuse self.inputs_placeholder = tf.placeholder(tf.float32, shape=tuple((None,None,)+ self.config.features_shape )) self.init_loc = tf.placeholder(tf.float32, shape=tuple((None,)+ self.config.init_loc_size)) self.targets_placeholder = tf.placeholder(tf.float32, shape=tuple((None,None,) + self.config.targets_shape)) self.config.seq_len = seq_len self.seq_len_placeholder = tf.placeholder(tf.int32, shape=tuple((None,) )) self.deeper = deeper self.loss_type = loss_type self.cumsum = cum_sum self.scope = scope if add_bn: self.norm_fn = tf.contrib.layers.batch_norm else: self.norm_fn = None if add_reg: self.reg_fn = tf.nn.l2_loss else: self.reg_fn = None if cell_type == 'rnn': self.cell = tf.contrib.rnn.RNNCell elif cell_type == 'gru': self.cell = tf.contrib.rnn.GRUCell elif cell_type == 'lstm': self.cell = tf.contrib.rnn.LSTMCell else: raise ValueError('Input correct cell type')
def get_mc_target(rewards_t, discount): discounts = discount ** tf.cast(tf.range(tf.shape(rewards_t)[0]), dtype=tf.float32) epsilon = 1e-7 return tf.cumsum(rewards_t * discounts, reverse=True) / (discounts + epsilon)
def tf_percentile(images): min = tf.reduce_min(tf.log(1.0 + images)) max = tf.reduce_max(tf.log(1.0 + images)) histogram = tf.histogram_fixed_width(tf.reshape(images, [-1]), [min, max]) values = tf.linspace(min, max, 100) csum = tf.cumsum(histogram) csum_float = tf.cast(csum, tf.float32) / tf.cast(tf.size(csum), tf.float32) argmin_index = tf.cast(tf.argmin((csum_float - 0.95) ** 2.0, axis = 0), tf.int32) return tf.exp(values[argmin_index]) - 1.0
def nonseq2seq(tensor, seq_length, length, name=None): ''' Convert non sequential data to sequential data Args: tensor: non sequential data, which is a TxF tensor where T is the sum of all sequence lengths seq_length: a vector containing the sequence lengths length: the constant length of the output sequences name: [optional] the name of the operation Returns: sequential data, wich is a list containing an N x F tensor for each time step where N is the batch size and F is the input dimension ''' with tf.name_scope(name or'nonseq2seq'): #get the cumulated sequence lengths to specify the positions in tensor cum_seq_length = tf.concat(0, [tf.constant([0]), tf.cumsum(seq_length)]) #get the indices in the tensor for each sequence indices = [tf.range(cum_seq_length[l], cum_seq_length[l+1]) for l in range(int(seq_length.get_shape()[0]))] #create the non-padded sequences sequences = [tf.gather(tensor, i) for i in indices] #pad the sequences with zeros sequences = [tf.pad(sequences[s], [[0, length-seq_length[s]], [0, 0]]) for s in range(len(sequences))] #specify that the sequences have been padded to the constant length for seq in sequences: seq.set_shape([length, int(tensor.get_shape()[1])]) #convert the list for eqch sequence to a list for eqch time step tensorlist = tf.unpack(tf.pack(sequences), axis=1) return tensorlist
def cumsum(x, axis=0): """Cumulative sum of the values in a tensor, alongside the specified axis. # Arguments x: A tensor or variable. axis: An integer, the axis to compute the sum. # Returns A tensor of the cumulative sum of values of `x` along `axis`. """ axis = _normalize_axis(axis, ndim(x)) return tf.cumsum(x, axis=axis)
def weights_prepend_inputs_to_targets(labels): """Assign weight 1.0 to only the "targets" portion of the labels. Weight 1.0 is assigned to all nonzero labels past the first zero. See prepend_mode in common_hparams.py Args: labels: A Tensor of int32s. Returns: A Tensor of floats. """ past_first_zero = tf.cumsum(tf.to_float(tf.equal(labels, 0)), axis=1) nonzero = tf.to_float(labels) return tf.to_float(tf.not_equal(past_first_zero * nonzero, 0))
def weights_concatenated(labels): """Assign weight 1.0 to the "target" part of the concatenated labels. The labels look like: source English I love you . ID1 target French Je t'aime . ID1 source English the cat ID1 target French le chat ID1 source English ... We want to assign weight 1.0 to all words in the target text (including the ID1 end symbol), but not to the source text or the boilerplate. In the above example, the target words that get positive weight are: Je t'aime . ID1 le chat ID1 Args: labels: a Tensor Returns: a Tensor """ eos_mask = tf.to_int32(tf.equal(labels, 1)) sentence_num = tf.cumsum(eos_mask, axis=1, exclusive=True) in_target = tf.equal(tf.mod(sentence_num, 2), 1) # first two tokens of each sentence are boilerplate. sentence_num_plus_one = sentence_num + 1 shifted = tf.pad(sentence_num_plus_one, [[0, 0], [2, 0], [0, 0], [0, 0]])[:, :-2, :, :] nonboilerplate = tf.equal(sentence_num_plus_one, shifted) ret = tf.to_float(tf.logical_and(nonboilerplate, in_target)) return ret
def attention_bias_prepend_inputs_full_attention(padding): """Create a bias tensor for prepend_mode="prepend_inputs_full_attention". See prepend_inputs in common_hparams.py. Produces a bias tensor to be used in self-attention. This bias tensor allows for full connectivity in the "inputs" part of the sequence and masked connectivity in the targets part. Args: padding: a float `Tensor` with shape [batch, length] with ones in positions corresponding to padding. In each row, a single padding position separates the input part from the target part. Returns: a `Tensor` with shape [batch, 1, length, length]. """ # Everything past the first padding position is part of the target. # This Tensor has zeros for the source portion and separator, # and ones for the target portion. in_target = tf.cumsum(padding, axis=1, exclusive=True) # The position within the target, or 0 if part of the source. target_pos = tf.cumsum(in_target, axis=1) # A position with a lesser target_pos cannot see a position with greater # target_pos. illegal_connections = tf.greater( tf.expand_dims(target_pos, 1), tf.expand_dims(target_pos, 2)) bias = tf.to_float(illegal_connections) * -1e9 bias = tf.expand_dims(bias, 1) return bias
def _build_discounts_matrix(self, T, gamma): """Build lower-triangular matrix of discounts. For example for T = 3: D = [[1, 0, 0] [gamma, 1, 0] [gamma^2, gamma, 1]] Then with R, our N x T incremental rewards matrix, the discounted sum is R * D """ power_ltri = tf.cumsum( tf.sequence_mask(tf.range(T)+1, T, dtype=tf.float32), exclusive=True ) gamma_ltri = tf.pow(gamma, power_ltri) gamma_ltri *= tf.sequence_mask(tf.range(T)+1, T, dtype=tf.float32) return gamma_ltri
def generator(rnn_inputs, #dims batch_size x num_steps x input_size seqlen, cell_type = 'LSTM', num_layers = 1, state_size = 64, batch_size = BATCH_SIZE ): with tf.variable_scope("generator"): num_steps = tf.shape(rnn_inputs)[1] # RNN if cell_type == 'Basic': cell = tf.contrib.rnn.BasicRNNCell(state_size) elif cell_type == 'LSTM': cell = tf.contrib.rnn.LSTMCell(state_size,state_is_tuple=True) # tuple of c_state and m_state if cell_type == 'LSTM': cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True) elif cell_type == 'Basic': cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=False) init_state = cell.zero_state(batch_size, tf.float32) rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, sequence_length=seqlen, initial_state=init_state) # dynamic_rnn produces rnn_outputs with shape [batch_size, num_steps, state_size] # the outputs is zero after seqlen if provided #reshape rnn_outputs rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])# reshape and reverse reshape logically consistent # Softmax layer with tf.variable_scope('FullConnect'): W = tf.get_variable('Wt', [state_size, 1]) b = tf.get_variable('bt', [1], initializer=tf.constant_initializer(0.0)) logits_t = tf.matmul(rnn_outputs, W) + b logits_t = tf.nn.elu(logits_t)+1 #abs, exp, or nothing is better if not D_DIFF and G_DIFF: # depend on D_DIFF logits_t = tf.cumsum(logits_t,axis=1) if MARK: # Softmax layer with tf.variable_scope('softmax'): W = tf.get_variable('Wz', [state_size, DIM_SIZE]) b = tf.get_variable('bz', [DIM_SIZE], initializer=tf.constant_initializer(0.0)) logits_prob = tf.matmul(rnn_outputs, W) + b logits_prob = tf.nn.softmax(logits_prob) logits = tf.concat([logits_t,logits_prob],axis=1) if MARK: logits = tf.reshape(logits,[batch_size,num_steps,DIM_SIZE+1]) else: logits = tf.reshape(logits_t,[batch_size,num_steps,1]) return logits
def compute_spans(start_scores, end_scores, answer2support, is_eval, support2question, beam_size=1, max_span_size=10000, correct_start=None): max_support_length = tf.shape(start_scores)[1] _, _, num_doc_per_question = tf.unique_with_counts(support2question) offsets = tf.cumsum(num_doc_per_question, exclusive=True) doc_idx_for_support = tf.range(tf.shape(support2question)[0]) - tf.gather(offsets, support2question) def train(): gathered_end_scores = tf.gather(end_scores, answer2support) gathered_start_scores = tf.gather(start_scores, answer2support) if correct_start is not None: # assuming we know the correct start we only consider ends after that left_mask = misc.mask_for_lengths(tf.cast(correct_start, tf.int32), max_support_length, mask_right=False) gathered_end_scores = gathered_end_scores + left_mask predicted_start_pointer = tf.argmax(gathered_start_scores, axis=1, output_type=tf.int32) predicted_end_pointer = tf.argmax(gathered_end_scores, axis=1, output_type=tf.int32) return (start_scores, end_scores, tf.gather(doc_idx_for_support, answer2support), predicted_start_pointer, predicted_end_pointer) def eval(): # we collect spans for top k starts and top k ends and select the top k from those top 2k doc_idx1, start_pointer1, end_pointer1, span_score1 = _get_top_k( start_scores, end_scores, beam_size, max_span_size, support2question) doc_idx2, end_pointer2, start_pointer2, span_score2 = _get_top_k( end_scores, start_scores, beam_size, -max_span_size, support2question) doc_idx = tf.concat([doc_idx1, doc_idx2], 1) start_pointer = tf.concat([start_pointer1, start_pointer2], 1) end_pointer = tf.concat([end_pointer1, end_pointer2], 1) span_score = tf.concat([span_score1, span_score2], 1) _, idx = tf.nn.top_k(span_score, beam_size) r = tf.range(tf.shape(span_score)[0], dtype=tf.int32) r = tf.reshape(tf.tile(tf.expand_dims(r, 1), [1, beam_size]), [-1, 1]) idx = tf.concat([r, tf.reshape(idx, [-1, 1])], 1) doc_idx = tf.gather_nd(doc_idx, idx) start_pointer = tf.gather_nd(start_pointer, idx) end_pointer = tf.gather_nd(end_pointer, idx) return (start_scores, end_scores, tf.gather(doc_idx_for_support, doc_idx), start_pointer, end_pointer) return tf.cond(is_eval, eval, train)
def reconstruction_loss(self, x_input, x_target, x_length, z=None): """Reconstruction loss calculation. Args: x_input: Batch of decoder input sequences for teacher forcing, sized `[batch_size, max(x_length), output_depth]`. x_target: Batch of expected output sequences to compute loss against, sized `[batch_size, max(x_length), output_depth]`. x_length: Length of input/output sequences, sized `[batch_size]`. z: (Optional) Latent vectors. Required if model is conditional. Sized `[n, z_size]`. Returns: r_loss: The reconstruction loss for each sequence in the batch. metric_map: Map from metric name to tf.metrics return values for logging. truths: Ground truth labels, sized """ batch_size = x_input.shape[0].value has_z = z is not None z = tf.zeros([batch_size, 0]) if z is None else z repeated_z = tf.tile( tf.expand_dims(z, axis=1), [1, tf.shape(x_input)[1], 1]) sampling_probability_static = tensor_util.constant_value( self._sampling_probability) if sampling_probability_static == 0.0: # Use teacher forcing. x_input = tf.concat([x_input, repeated_z], axis=2) helper = tf.contrib.seq2seq.TrainingHelper(x_input, x_length) else: # Use scheduled sampling. helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper( inputs=x_input, sequence_length=x_length, auxiliary_inputs=repeated_z if has_z else None, sampling_probability=self._sampling_probability, next_inputs_fn=self._sample) decoder_outputs = self._decode(batch_size, helper=helper, z=z) flat_x_target = flatten_maybe_padded_sequences(x_target, x_length) flat_rnn_output = flatten_maybe_padded_sequences( decoder_outputs.rnn_output, x_length) r_loss, metric_map, truths, predictions = self._flat_reconstruction_loss( flat_x_target, flat_rnn_output) # Sum loss over sequences. cum_x_len = tf.concat([(0,), tf.cumsum(x_length)], axis=0) r_losses = [] for i in range(batch_size): b, e = cum_x_len[i], cum_x_len[i + 1] r_losses.append(tf.reduce_sum(r_loss[b:e])) r_loss = tf.stack(r_losses) return r_loss, metric_map, truths, predictions
def add_loss_op(self): logits_shape = tf.shape(self.logits) logits_flat = tf.reshape(self.logits, [-1]) location_dist = tf.contrib.distributions.MultivariateNormalDiag(mu=logits_flat, diag_stdev=self.config.variance*tf.ones_like(logits_flat)) location_samples = location_dist.sample([self.config.num_samples]) new_logits_shape = tf.concat([[self.config.num_samples,] , logits_shape], axis=0) location_samples = tf.reshape(location_samples, new_logits_shape) self.location_samples = location_samples if self.loss_type == 'negative_l1_dist': rewards = -tf.reduce_mean(tf.abs(location_samples - self.targets_placeholder),axis=2,keep_dims=True) - \ tf.reduce_max(tf.abs(location_samples - self.targets_placeholder), axis=2,keep_dims=True) elif self.loss_type == 'iou': rewards = self.get_iou_loss() rewards = tf.expand_dims(rewards,axis=-1) timestep_rewards = tf.reduce_mean(rewards, axis=0, keep_dims=True) self.timestep_rewards = timestep_rewards if self.cumsum: tot_cum_rewards = tf.cumsum(rewards, axis=2, reverse=True) else: tot_cum_rewards = tf.reduce_sum(rewards, axis=2, keep_dims = True) self.tot_cum_rewards = tot_cum_rewards timestep_rewards_grad_op = tf.stop_gradient(timestep_rewards) rewards_grad_op = tf.stop_gradient(rewards) location_samples_op = tf.stop_gradient(location_samples) tot_cum_rewards_op = tf.stop_gradient(tot_cum_rewards) const1 = 1.0 / (np.sqrt(2.0 * math.pi) * self.config.variance) const2 = 2.0 * self.config.variance**2 squared_diff = tf.square(self.targets_placeholder - self.logits) density_func = tf.log(const1 * tf.exp(-squared_diff / const2)) self.density_func = density_func self.loss = tf.reduce_mean(tf.reduce_sum(density_func*(tot_cum_rewards_op - timestep_rewards_grad_op), axis=2), axis=[1, 0]) self.total_rewards = tf.reduce_mean(tf.reduce_sum(timestep_rewards, axis=2), axis=1) tf.summary.scalar('Total Rewards', self.total_rewards[0])
def add_loss_op(self, loss_type='negative_l1_dist'): self.loss_type = loss_type logits_shape = tf.shape(self.logits) logits_flat = tf.reshape(self.logits, [-1]) location_dist = tf.contrib.distributions.MultivariateNormalDiag(mu=logits_flat, diag_stdev=self.config.variance*tf.ones_like(logits_flat)) location_samples = location_dist.sample([self.config.num_samples]) new_logits_shape = tf.concat([[self.config.num_samples,] , logits_shape], axis=0) location_samples = tf.reshape(location_samples, new_logits_shape) self.location_samples = location_samples if self.loss_type == 'negative_l1_dist': rewards = -tf.reduce_mean(tf.abs(location_samples - tf.cast(self.targets_placeholder,tf.float32)),axis=4,keep_dims=True) - \ tf.reduce_max(tf.abs(location_samples - tf.cast(self.targets_placeholder,tf.float32)), axis=4,keep_dims=True) elif self.loss_type == 'iou': rewards = self.get_iou_loss() rewards = tf.expand_dims(rewards,axis=-1) print location_samples.get_shape().as_list() print rewards.get_shape().as_list() timestep_rewards = tf.reduce_mean(rewards, axis=0, keep_dims=True) print timestep_rewards.get_shape().as_list() self.timestep_rewards = timestep_rewards if self.cumsum: tot_cum_rewards = tf.cumsum(rewards, axis=3, reverse=True) else: tot_cum_rewards = tf.tile(tf.reduce_sum(rewards, axis=3, keep_dims = True),multiples=[1,1,1, self.config.seq_len, 1]) self.tot_cum_rewards = tot_cum_rewards timestep_rewards_grad_op = tf.stop_gradient(timestep_rewards) rewards_grad_op = tf.stop_gradient(rewards) location_samples_op = tf.stop_gradient(location_samples) tot_cum_rewards_op = tf.stop_gradient(tot_cum_rewards) const1 = 1.0 / (np.sqrt(2.0 * math.pi) * self.config.variance) const2 = 2.0 * self.config.variance**2 squared_diff = tf.square(self.targets_placeholder - self.logits) density_func = tf.log(const1 * tf.exp(-squared_diff / const2)) self.density_func = density_func self.loss = tf.reduce_mean(tf.reduce_sum(density_func*(tot_cum_rewards_op - timestep_rewards_grad_op), axis=3), axis=[1, 0]) self.total_rewards = tf.reduce_mean(tf.reduce_sum(timestep_rewards, axis=3), axis=[2,1]) tf.summary.scalar('Total Rewards', self.total_rewards[0][0])
def add_loss_op(self, loss_type='negative_l1_dist'): self.loss_type = loss_type logits_shape = tf.shape(self.logits) logits_flat = tf.reshape(self.logits, [-1]) location_dist = tf.contrib.distributions.MultivariateNormalDiag(mu=logits_flat, diag_stdev=self.config.variance*tf.ones_like(logits_flat)) location_samples = location_dist.sample([self.config.num_samples]) new_logits_shape = tf.concat([[self.config.num_samples,] , logits_shape], axis=0) location_samples = tf.reshape(location_samples, new_logits_shape) self.location_samples = location_samples if self.loss_type == 'negative_l1_dist': rewards = -tf.reduce_mean(tf.abs(location_samples - tf.cast(self.targets_placeholder,tf.float32)),axis=3,keep_dims=True) - \ tf.reduce_max(tf.abs(location_samples - tf.cast(self.targets_placeholder,tf.float32)), axis=3,keep_dims=True) elif self.loss_type == 'iou': rewards = self.get_iou_loss() rewards = tf.expand_dims(rewards,axis=-1) timestep_rewards = tf.reduce_mean(rewards, axis=0, keep_dims=True) self.timestep_rewards = timestep_rewards if self.cumsum: tot_cum_rewards = tf.cumsum(rewards, axis=2, reverse=True) else: tot_cum_rewards = tf.tile(tf.reduce_sum(rewards, axis=2, keep_dims = True),multiples=[1,1,self.config.seq_len, 1]) self.tot_cum_rewards = tot_cum_rewards timestep_rewards_grad_op = tf.stop_gradient(timestep_rewards) rewards_grad_op = tf.stop_gradient(rewards) location_samples_op = tf.stop_gradient(location_samples) tot_cum_rewards_op = tf.stop_gradient(tot_cum_rewards) const1 = 1.0 / (np.sqrt(2.0 * math.pi) * self.config.variance) const2 = 2.0 * self.config.variance**2 squared_diff = tf.square(self.targets_placeholder - self.logits) density_func = tf.log(const1 * tf.exp(-squared_diff / const2)) self.density_func = density_func self.loss = tf.reduce_mean(tf.reduce_sum(density_func*(tot_cum_rewards_op - timestep_rewards_grad_op), axis=2), axis=[1, 0]) self.total_rewards = tf.reduce_mean(tf.reduce_sum(timestep_rewards, axis=2), axis=1) tf.summary.scalar('Total Rewards', self.total_rewards[0][0])
def add_loss_op(self, loss_type='negative_l1_dist', pretrain=False): self.loss_type = loss_type logits_shape = tf.shape(self.logits) logits_flat = tf.reshape(self.logits, [-1]) location_dist = tf.contrib.distributions.MultivariateNormalDiag(mu=logits_flat, diag_stdev=self.config.variance*tf.ones_like(logits_flat)) location_samples = location_dist.sample([self.config.num_samples]) new_logits_shape = tf.concat([[self.config.num_samples,] , logits_shape], axis=0) location_samples = tf.reshape(location_samples, new_logits_shape) self.location_samples = location_samples # print self.location_samples.get_shape().as_list() if pretrain: if self.loss_type == 'negative_l1_dist': rewards = -tf.reduce_mean(tf.abs(self.location_samples - tf.cast(self.targets_placeholder,tf.float32)),axis=3,keep_dims=True) - \ tf.reduce_max(tf.abs(self.location_samples - tf.cast(self.targets_placeholder,tf.float32)), axis=3,keep_dims=True) elif self.loss_type == 'iou': rewards = self.get_iou_loss() rewards = tf.expand_dims(rewards,axis=-1) else: rewards = self.qvalues_placeholder timestep_rewards = tf.reduce_mean(rewards, axis=0, keep_dims=True) self.timestep_rewards = timestep_rewards if self.cumsum: tot_cum_rewards = tf.cumsum(rewards, axis=2, reverse=True) else: tot_cum_rewards = tf.tile(tf.reduce_sum(rewards, axis=2, keep_dims = True),multiples=[1,1,self.config.seq_len, 1]) self.tot_cum_rewards = tot_cum_rewards timestep_rewards_grad_op = tf.stop_gradient(timestep_rewards) rewards_grad_op = tf.stop_gradient(rewards) location_samples_op = tf.stop_gradient(location_samples) tot_cum_rewards_op = tf.stop_gradient(tot_cum_rewards) const1 = 1.0 / (np.sqrt(2.0 * math.pi) * self.config.variance) const2 = 2.0 * self.config.variance**2 squared_diff = tf.square(self.targets_placeholder - self.logits) density_func = tf.log(const1 * tf.exp(-squared_diff / const2)) self.density_func = density_func self.loss = tf.reduce_mean(tf.reduce_sum(density_func*(tot_cum_rewards_op - timestep_rewards_grad_op), axis=2), axis=[1, 0]) self.total_rewards = tf.reduce_mean(tf.reduce_sum(timestep_rewards, axis=2), axis=1)
def build_reward(self): with tf.name_scope('permutations'): # Reorder input % tour self.permutations = tf.stack([tf.tile(tf.expand_dims(tf.range(self.batch_size,dtype=tf.int32),1),[1,self.max_length+2]),self.positions],2) self.ordered_input_ = tf.gather_nd(self.input_,self.permutations) self.ordered_input_ = tf.transpose(self.ordered_input_,[2,1,0]) # [batch size, seq length +1 , features] to [features, seq length +1, batch_size] Rq: +1 because end = start = depot # Ordered coordinates ordered_x_ = self.ordered_input_[0] # [seq length +1, batch_size] delta_x2 = tf.transpose(tf.square(ordered_x_[1:]-ordered_x_[:-1]),[1,0]) # [batch_size, seq length] delta_x**2 ordered_y_ = self.ordered_input_[1] # [seq length +1, batch_size] delta_y2 = tf.transpose(tf.square(ordered_y_[1:]-ordered_y_[:-1]),[1,0]) # [batch_size, seq length] delta_y**2 # Ordered TW constraints self.ordered_tw_mean_ = tf.transpose(self.ordered_input_[2][:-1],[1,0]) # [seq length, batch_size] to [batch_size, seq length] self.ordered_tw_width_ = tf.transpose(self.ordered_input_[3][:-1],[1,0]) # [seq length, batch_size] to [batch_size, seq length] self.ordered_tw_open_ = self.ordered_tw_mean_ - self.ordered_tw_width_/2 self.ordered_tw_close_ = self.ordered_tw_mean_ + self.ordered_tw_width_/2 with tf.name_scope('environment'): # Get tour length (euclidean distance) inter_city_distances = tf.sqrt(delta_x2+delta_y2) # sqrt(delta_x**2 + delta_y**2) this is the euclidean distance between each city: depot --> ... ---> depot [batch_size, seq length] self.distances = tf.reduce_sum(inter_city_distances, axis=1) # [batch_size] variable_summaries('tour_length',self.distances, with_max_min = True) # Get time at each city if no constraint self.time_at_cities = (1/self.speed)*tf.cumsum(inter_city_distances, axis=1, exclusive=True)-10 # [batch size, seq length] # Rq: -10 to be on time at depot (t_mean centered) # Apply constraints to each city self.constrained_delivery_time = [] cumul_lateness = 0 for time_open, delivery_time in zip(tf.unstack(self.ordered_tw_open_,axis=1), tf.unstack(self.time_at_cities,axis=1)): # Unstack % seq length delayed_delivery = delivery_time + cumul_lateness cumul_lateness += tf.maximum(time_open-delayed_delivery,tf.zeros([self.batch_size])) # if you have to wait... wait (impacts further states) self.constrained_delivery_time.append(delivery_time+cumul_lateness) self.constrained_delivery_time = tf.stack(self.constrained_delivery_time,1) # Define delay from lateness self.delay = tf.maximum(self.constrained_delivery_time-self.ordered_tw_close_-0.0001, tf.zeros([self.batch_size,self.max_length+1])) # Delay perceived by the client (doesn't care if the deliver waits..) self.delay = tf.count_nonzero(self.delay,1) variable_summaries('delay',tf.cast(self.delay,tf.float32), with_max_min = True) # Define reward from tour length & delay self.reward = tf.cast(self.distances,tf.float32)+self.beta*tf.sqrt(tf.cast(self.delay,tf.float32)) variable_summaries('reward',self.reward, with_max_min = True)
def __init__(self, requests, expert_capacity): """Create a TruncatingDispatcher. Args: requests: a boolean `Tensor` of shape `[batch, length, num_experts]`. Alternatively, a float or int Tensor containing zeros and ones. expert_capacity: a Scalar - maximum number of examples per expert per batch element. Returns: a TruncatingDispatcher """ self._requests = tf.to_float(requests) self._expert_capacity = expert_capacity expert_capacity_f = tf.to_float(expert_capacity) self._batch, self._length, self._num_experts = tf.unstack( tf.shape(self._requests), num=3) # [batch, length, num_experts] position_in_expert = tf.cumsum(self._requests, axis=1, exclusive=True) # [batch, length, num_experts] self._gates = self._requests * tf.to_float( tf.less(position_in_expert, expert_capacity_f)) batch_index = tf.reshape( tf.to_float(tf.range(self._batch)), [self._batch, 1, 1]) length_index = tf.reshape( tf.to_float(tf.range(self._length)), [1, self._length, 1]) expert_index = tf.reshape( tf.to_float(tf.range(self._num_experts)), [1, 1, self._num_experts]) # position in a Tensor with shape [batch * num_experts * expert_capacity] flat_position = ( position_in_expert + batch_index * (tf.to_float(self._num_experts) * expert_capacity_f) + expert_index * expert_capacity_f) # Tensor of shape [batch * num_experts * expert_capacity]. # each element is an integer in [0, length) self._indices = tf.unsorted_segment_sum( data=tf.reshape((length_index + 1.0) * self._gates, [-1]), segment_ids=tf.to_int32(tf.reshape(flat_position, [-1])), num_segments=self._batch * self._num_experts * expert_capacity) self._indices = tf.reshape( self._indices, [self._batch, self._num_experts, expert_capacity]) # Tensors of shape [batch, num_experts, expert_capacity]. # each element is 0.0 or 1.0 self._nonpadding = tf.minimum(self._indices, 1.0) # each element is an integer in [0, length) self._indices = tf.nn.relu(self._indices - 1.0) # self._flat_indices is [batch, num_experts, expert_capacity], with values # in [0, batch * length) self._flat_indices = tf.to_int32( self._indices + (tf.reshape(tf.to_float(tf.range(self._batch)), [-1, 1, 1]) * tf.to_float(self._length))) self._indices = tf.to_int32(self._indices)
def compute_down_msg(self, inc_node_msg, node_to_span_off_belief_idx, node_to_span_on_start_belief_idx, node_to_span_on_end_belief_idx, parent_on_down_to_sum_tree_idx, parent_off_down_to_sum_tree_idx): """Compute downward BP messages for this layer of the tree. Args: inc_node_msg: incoming messages from parent variables. node_to_span_off_belief_idx: map from node marginals at this layer to corresponding span-off marginals. node_to_span_on_start_belief_idx: map marking start of each span marginal. node_to_span_on_end_belief_idx: map marking end of each span marginal. parent_on_down_to_sum_tree_idx: map from marginal of parent-on variable down to child variable. parent_off_down_to_sum_tree_idx: map from marginal of parent-off variable down to child variable. Returns: span_off_marginals: out_msg: """ node_marginals = self.up_node_msg * inc_node_msg span_off_beliefs = padded_gather_nd(node_marginals, node_to_span_off_belief_idx, 3, 4) cumulative_node_beliefs = tf.cumsum(node_marginals, 2) span_on_start_cumulative_belief = padded_gather_nd( cumulative_node_beliefs, node_to_span_on_start_belief_idx, 3, 4) span_on_end_cumulative_belief = padded_gather_nd( cumulative_node_beliefs, node_to_span_on_end_belief_idx, 3, 4) span_on_beliefs = ( span_on_end_cumulative_belief - span_on_start_cumulative_belief) span_belief_normalizer = span_on_beliefs + span_off_beliefs span_off_marginals = su.safe_divide(span_off_beliefs, span_belief_normalizer) out_msg = padded_gather_nd(inc_node_msg, parent_on_down_to_sum_tree_idx, 3, 4) out_msg += padded_gather_nd(inc_node_msg, parent_off_down_to_sum_tree_idx, 3, 4) return span_off_marginals, out_msg
def _subsample_selection_to_desired_neg_pos_ratio(self, indices, match, max_negatives_per_positive, min_negatives_per_image=0): """Subsample a collection of selected indices to a desired neg:pos ratio. This function takes a subset of M indices (indexing into a large anchor collection of N anchors where M<N) which are labeled as positive/negative via a Match object (matched indices are positive, unmatched indices are negative). It returns a subset of the provided indices retaining all positives as well as up to the first K negatives, where: K=floor(num_negative_per_positive * num_positives). For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors), with positives=[2, 5] and negatives=[4, 7, 9, 10] and num_negatives_per_positive=1, then the returned subset of indices is [2, 4, 5, 7]. Args: indices: An integer tensor of shape [M] representing a collection of selected anchor indices match: A matcher.Match object encoding the match between anchors and groundtruth boxes for a given image, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. max_negatives_per_positive: (float) maximum number of negatives for each positive anchor. min_negatives_per_image: minimum number of negative anchors for a given image. Allow sampling negatives in image without any positive anchors. Returns: selected_indices: An integer tensor of shape [M'] representing a collection of selected anchor indices with M' <= M. num_positives: An integer tensor representing the number of positive examples in selected set of indices. num_negatives: An integer tensor representing the number of negative examples in selected set of indices. """ positives_indicator = tf.gather(match.matched_column_indicator(), indices) negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices) num_positives = tf.reduce_sum(tf.to_int32(positives_indicator)) max_negatives = tf.maximum(min_negatives_per_image, tf.to_int32(max_negatives_per_positive * tf.to_float(num_positives))) topk_negatives_indicator = tf.less_equal( tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives) subsampled_selection_indices = tf.where( tf.logical_or(positives_indicator, topk_negatives_indicator)) num_negatives = tf.size(subsampled_selection_indices) - num_positives return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]), num_positives, num_negatives)