我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.sequence_mask()。
def bag_of_tokens(config, labels, label_lengths): if config.train_output_embeddings: with tf.variable_scope('embed', reuse=True): output_embeddings = tf.get_variable('output_embedding') else: output_embeddings = tf.constant(config.output_embedding_matrix) #everything_label_placeholder = tf.placeholder(shape=(None, config.max_length,), dtype=tf.int32) #everything_label_length_placeholder = tf.placeholder(shape=(None,), dtype=tf.int32) labels = tf.constant(np.array(labels)) embedded_output = tf.gather(output_embeddings, labels) print('embedded_output before', embedded_output) #mask = tf.sequence_mask(label_lengths, maxlen=config.max_length, dtype=tf.float32) # note: this multiplication will broadcast the mask along all elements of the depth dimension # (which is why we run the expand_dims to choose how to broadcast) #embedded_output = embedded_output * tf.expand_dims(mask, axis=2) #print('embedded_output after', embedded_output) return tf.reduce_sum(embedded_output, axis=1)
def cross_entropy_sequence_loss(logits, targets, sequence_length): """Calculates the per-example cross-entropy loss for a sequence of logits and masks out all losses passed the sequence length. Args: logits: Logits of shape `[T, B, vocab_size]` targets: Target classes of shape `[T, B]` sequence_length: An int32 tensor of shape `[B]` corresponding to the length of each input Returns: A tensor of shape [T, B] that contains the loss per example, per time step. """ with tf.name_scope("cross_entropy_sequence_loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # Mask out the losses we don't care about loss_mask = tf.sequence_mask( tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0])) losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0]) return losses
def ar_layer(z0,hps,n_hidden=10): ''' old iaf layer ''' # Repeat input z_rep = tf.reshape(tf.tile(z0,[1,hps.z_size]),[-1,hps.z_size]) # make mask mask = tf.sequence_mask(tf.range(hps.z_size),hps.z_size)[None,:,:] mask = tf.reshape(tf.tile(mask,[tf.shape(z0)[0],1,1]),[-1,hps.z_size]) # predict mu and sigma z_mask = z_rep * tf.to_float(mask) mid = slim.fully_connected(z_mask,n_hidden,activation_fn=tf.nn.relu) pars = slim.fully_connected(mid,2,activation_fn=None) pars = tf.reshape(pars,[-1,hps.z_size,2]) mu, log_sigma = tf.unstack(pars,axis=2) return mu, log_sigma
def mask_for_lengths(lengths, max_length=None, mask_right=True, value=-1000.0): """ Creates a [batch_size x max_length] mask. Args: lengths: int32 1-dim tensor of batch_size lengths max_length: int32 0-dim tensor or python int mask_right: if True, everything before "lengths" becomes zero and the rest "value", else vice versa value: value for the mask Returns: [batch_size x max_length] mask of zeros and "value"s """ mask = tf.sequence_mask(lengths, max_length, dtype=tf.float32) if mask_right: mask = 1.0 - mask mask *= value return mask
def global_attention(state, hidden_states, encoder, encoder_input_length, scope=None, context=None, **kwargs): with tf.variable_scope(scope or 'attention_{}'.format(encoder.name)): if context is not None and encoder.use_context: state = tf.concat([state, context], axis=1) if encoder.attn_filters: e = compute_energy_with_filter(hidden_states, state, attn_size=encoder.attn_size, attn_filters=encoder.attn_filters, attn_filter_length=encoder.attn_filter_length, **kwargs) else: e = compute_energy(hidden_states, state, attn_size=encoder.attn_size, attn_keep_prob=encoder.attn_keep_prob, pervasive_dropout=encoder.pervasive_dropout, layer_norm=encoder.layer_norm, mult_attn=encoder.mult_attn, **kwargs) e -= tf.reduce_max(e, axis=1, keep_dims=True) mask = tf.sequence_mask(encoder_input_length, maxlen=tf.shape(hidden_states)[1], dtype=tf.float32) T = encoder.attn_temperature or 1.0 exp = tf.exp(e / T) * mask weights = exp / tf.reduce_sum(exp, axis=-1, keep_dims=True) weighted_average = tf.reduce_sum(tf.expand_dims(weights, 2) * hidden_states, axis=1) return weighted_average, weights
def _create_position_embedding(self, lengths, maxlen): # Slice to size of current sequence pe_slice = self.pos_embed[2:maxlen+2, :] # Replicate encodings for each element in the batch batch_size = tf.shape(lengths)[0] pe_batch = tf.tile([pe_slice], [batch_size, 1, 1]) # Mask out positions that are padded positions_mask = tf.sequence_mask( lengths=lengths, maxlen=maxlen, dtype=tf.float32) positions_embed = pe_batch * tf.expand_dims(positions_mask, 2) positions_embed = tf.reverse_sequence(positions_embed, lengths, batch_dim=0, seq_dim=1) # [[1,2,3,4,PAD,PAD,PAD],[2,3,PAD,PAD,PAD,PAD,PAD]] [4,2] positions_embed = tf.reverse(positions_embed,[1]) # --> [[4,3,2,1,PAD,PAD,PAD],[3,2,PAD,PAD,PAD,PAD,PAD]] --> [[PAD,PAD,PAD,1,2,3,4],[PAD,PAD,PAD,PAD,PAD,2,3]] return positions_embed
def preprocess_input_sequences(self, data, shuffle=True): """ ?????? shuffle PAD/TRUNC???????? y_true????self.A_len????index=0??????one-hot?? """ documents, questions, answer, candidates = self.union_shuffle(data) if shuffle else data d_lens = [len(i) for i in documents] questions_ok = pad_sequences(questions, maxlen=self.q_len, dtype="int32", padding="post", truncating="post") documents_ok = pad_sequences(documents, maxlen=self.d_len, dtype="int32", padding="post", truncating="post") context_mask = K.eval(tf.sequence_mask(d_lens, self.d_len, dtype=tf.float32)) candidates_ok = pad_sequences(candidates, maxlen=self.A_len, dtype="int32", padding="post", truncating="post") y_true = np.zeros_like(candidates_ok) y_true[:, 0] = 1 return questions_ok, documents_ok, context_mask, candidates_ok, y_true
def _compute_loss(self, logits): """Compute optimization loss.""" target_output = self.iterator.target_output if self.time_major: target_output = tf.transpose(target_output) max_time = self.get_max_time(target_output) crossent = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=target_output, logits=logits) target_weights = tf.sequence_mask( self.iterator.target_sequence_length, max_time, dtype=logits.dtype) if self.time_major: target_weights = tf.transpose(target_weights) loss = tf.reduce_sum( crossent * target_weights) / tf.to_float(self.batch_size) return loss
def cross_entropy_sequence_loss(logits, targets, sequence_length): """Calculates the per-example cross-entropy loss for a sequence of logits and masks out all losses passed the sequence length. Args: logits: Logits of shape `[T, B, vocab_size]` targets: Target classes of shape `[T, B]` sequence_length: An int32 tensor of shape `[B]` corresponding to the length of each input Returns: A tensor of shape [T, B] that contains the loss per example, per time step. """ with tf.name_scope("cross_entropy_sequence_loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) loss_mask = tf.sequence_mask(tf.to_int32( sequence_length), tf.to_int32(tf.shape(targets)[0])) losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0]) return losses
def add_loss_op(self): """Defines the loss""" if self.config.use_crf: log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood( self.logits, self.labels, self.sequence_lengths) self.trans_params = trans_params # need to evaluate it for decoding self.loss = tf.reduce_mean(-log_likelihood) else: losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.labels) mask = tf.sequence_mask(self.sequence_lengths) losses = tf.boolean_mask(losses, mask) self.loss = tf.reduce_mean(losses) # for tensorboard tf.summary.scalar("loss", self.loss)
def add_loss_op(self): """ Adds loss to self """ if self.config.crf: log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( self.logits, self.labels, self.sequence_lengths) self.loss = tf.reduce_mean(-log_likelihood) else: losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels) mask = tf.sequence_mask(self.sequence_lengths) losses = tf.boolean_mask(losses, mask) self.loss = tf.reduce_mean(losses) # for tensorboard tf.summary.scalar("loss", self.loss)
def cross_entropy_sequence_loss(logits, targets, sequence_length): with tf.name_scope('cross_entropy_sequence_loss'): total_length = tf.to_float(tf.reduce_sum(sequence_length)) entropy_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # Mask out the losses we don't care about loss_mask = tf.sequence_mask( tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0])) loss_mask = tf.transpose(tf.to_float(loss_mask), [1, 0]) losses = entropy_losses * loss_mask # losses.shape: T * B # sequence_length: B total_loss_avg = tf.reduce_sum(losses) / total_length return total_loss_avg
def __init__(self, query_size, keys, values, values_length, name='attention'): self.attention_size = keys.get_shape().as_list()[-1] self.keys = keys self.values = values self.values_length = values_length self.query_trans = LinearOp(query_size, self.attention_size, name=name) with tf.variable_scope(name): self.v_att = tf.get_variable('v_att', shape=[self.attention_size], dtype=DTYPE) self.time_axis = 0 if TIME_MAJOR else 1 # Replace all scores for padded inputs with tf.float32.min num_scores = tf.shape(self.keys)[self.time_axis] scores_mask = tf.sequence_mask( lengths=tf.to_int32(self.values_length), maxlen=tf.to_int32(num_scores), dtype=DTYPE) if TIME_MAJOR: scores_mask = tf.transpose(scores_mask) self.scores_mask = scores_mask
def map(self, is_train, x, mask=None): x = tf.transpose(x, [1, 0, 2]) if self.bidirectional: with tf.variable_scope("forward"): fw = self._apply_transposed(is_train, x)[0] with tf.variable_scope("backward"): bw = self._apply_transposed(is_train, tf.reverse_sequence(x, mask, 0, 1))[0] bw = tf.reverse_sequence(bw, mask, 0, 1) out = tf.concat([fw, bw], axis=2) else: out = self._apply_transposed(is_train, x)[0] out = tf.transpose(out, [1, 0, 2]) if mask is not None: out *= tf.expand_dims(tf.cast(tf.sequence_mask(mask, tf.shape(out)[1]), tf.float32), 2) return out
def apply(self, is_train, x, mask=None): if self.map_layer is not None: x = self.map_layer.apply(is_train, x, mask) rank = len(x.shape) - 2 if mask is not None: shape = tf.shape(x) mask = tf.sequence_mask(tf.reshape(mask, (-1,)), shape[-2]) mask = tf.cast(tf.reshape(mask, (shape[0], shape[1], shape[2], 1)), tf.float32) # this min_val thing is kind of a hack, really we should do something like compute the # min val over the entire batch, or maybe just pick a very negative values, or maybe # do something a bit more finicky with tf.bool_mask # In practice it doesn't seem to be problem, and some of the earlier models used these # scheme so I have been sticking with it. if self.min_val == 0: x *= mask else: x = x * mask + self.min_val * (1 - mask) return tf.maximum(tf.reduce_max(x, axis=rank), tf.fill([1] * (len(x.shape)-1), float(self.min_val))) else: return tf.reduce_max(x, axis=rank)
def apply(self, is_train, x, mask=None): if mask is not None: answer_mask = tf.expand_dims(tf.cast(tf.sequence_mask(mask, tf.shape(x)[1]), tf.float32), 2) if self.apply_mask: x *= answer_mask else: answer_mask = None if self.reduce == "max": if mask is not None: raise NotImplementedError() return tf.reduce_max(x, axis=1) elif self.reduce == "mean": if mask is not None: return tf.reduce_sum(x * answer_mask, axis=1) / tf.cast(tf.expand_dims(mask, 1), tf.float32) else: return tf.reduce_mean(x, axis=1) elif self.reduce == "sum": if mask is not None: return tf.reduce_sum(x * answer_mask, axis=1) else: return tf.reduce_sum(x, axis=1) else: raise ValueError()
def apply(self, is_train, x, mask=None): if self.key_mapper is not None: with tf.variable_scope("map_keys"): keys = self.key_mapper.apply(is_train, x, mask) else: keys = x weights = tf.get_variable("weights", (keys.shape.as_list()[-1], self.n_encodings), dtype=tf.float32, initializer=get_keras_initialization(self.init)) dist = tf.tensordot(keys, weights, axes=[[2], [0]]) # (batch, x_words, n_encoding) if self.bias: dist += tf.get_variable("bias", (1, 1, self.n_encodings), dtype=tf.float32, initializer=tf.zeros_initializer()) if mask is not None: bool_mask = tf.expand_dims(tf.cast(tf.sequence_mask(mask, tf.shape(x)[1]), tf.float32), 2) dist = bool_mask * bool_mask + (1 - bool_mask) * VERY_NEGATIVE_NUMBER dist = tf.nn.softmax(dist, dim=1) out = tf.einsum("ajk,ajn->ank", x, dist) # (batch, n_encoding, feature) if self.post_process is not None: with tf.variable_scope("post_process"): out = self.post_process.apply(is_train, out) return out
def add_loss_op(self, logits): def seq_loss(logits_tensor, label_tensor, length_tensor): """ Args logits_tensor: shape (batch_size*time_steps_de, time_steps_en) label_tensor: shape (batch_size, time_steps_de), label id 1D tensor length_tensor: shape(batch_size) Return loss: A scalar tensor, mean error """ labels = tf.reshape(label_tensor, shape=(-1,)) loss_flat = tf.nn.sparse_softmax_cross_entropy_with_logits(logits_tensor, labels, name='sparse_softmax') losses = tf.reshape(loss_flat, shape=tf.shape(label_tensor)) #(batch_size, tstp_de) length_mask = tf.sequence_mask(length_tensor, tf.shape(losses)[1], dtype=tf.float32, name='length_mask') losses_sum = tf.reduce_sum(losses*length_mask, reduction_indices=[1]) #(batch_size) losses_mean = losses_sum / (tf.to_float(length_tensor)+1e-20) #(batch_size) loss = tf.reduce_mean(losses_mean) #scalar return loss reg_loss = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v != self.embedding]) *self.config.reg valid_loss = seq_loss(logits, self.decoder_label, self.decoder_tstps) train_loss = reg_loss + valid_loss return train_loss, valid_loss, reg_loss
def decodesIntoAccuracy(self, labels, perSymbol = True): # as the dimensions None x L accuracyMatrix = tf.equal(self.hardOutputs, labels) # zero out anything past the labeled length accuracyMatrix = tf.logical_and(accuracyMatrix, tf.sequence_mask(self.lengthPlaceholder, maxlen = self.maximumLength)) # Some across all of the time steps to get the total number of predictions correct in each batch entry accuracyVector = tf.reduce_sum(tf.cast(accuracyMatrix,tf.int32),axis = 1) if perSymbol: # Now normalize it by the sequence length and take the average accuracyVector = tf.divide(tf.cast(accuracyVector,tf.float32), tf.cast(self.lengthPlaceholder,tf.float32)) if not perSymbol: # accuracy is measured per sequence accuracyVector = tf.cast(tf.equal(accuracyVector,self.lengthPlaceholder),tf.float32) return tf.reduce_mean(accuracyVector)
def add_loss_op(self): """ Adds loss to self """ if self.crf: log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood( self.logits, self.labels, self.sequence_lengths) self.loss = tf.reduce_mean(-log_likelihood) else: losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels) mask = tf.sequence_mask(self.sequence_lengths) losses = tf.boolean_mask(losses, mask) self.loss = tf.reduce_mean(losses) # for tensorboard tf.summary.scalar("loss", self.loss)
def _train_fprop(self, state_list): '''The softmax is apply to n units that is not masked specified by the seqlen. state_list : [state_below, seqlen] state_below (2d tf tensor): shape = [batchsize, layer_dim] seqlen (1d tf tensor): shape = [batchsize] example: state_below = 3 x 5 matrix seqlen = [2, 1, 4] ''' assert len(state_list) == 2 state_below, seqlen = state_list assert len(seqlen.get_shape()) == 1 shape = state_below.get_shape() assert len(shape) == 2, 'state below dimenion {} != 2'.format(len(shape)) mask = tf.to_float(tf.sequence_mask(seqlen, shape[-1])) exp = tf.exp(state_below) * mask exp_sum = tf.reduce_sum(exp, axis=1) zeros = tf.to_float(tf.equal(exp_sum, 0)) softmax = tf.div(exp, tf.expand_dims(exp_sum + zeros, -1)) nonzeros = tf.to_float(tf.not_equal(exp_sum, 0)) softmax = softmax * tf.expand_dims(nonzeros, -1) return softmax
def _masked_softmax(logits, lengths): """ Softmax on last axis with proper mask """ sequence_mask = tf.expand_dims( tf.sequence_mask( lengths, maxlen=tf.shape(logits)[-1], dtype=tf.float32), dim=1 ) max_logits = tf.reduce_max(logits, axis=-1, keep_dims=True) masked_logit_exp = tf.exp(logits - max_logits) * sequence_mask logit_sum = tf.reduce_sum(masked_logit_exp, axis=-1, keep_dims=True) probs = masked_logit_exp / logit_sum return probs
def _make_beam_mask(self, num_available_beams): mask = tf.sequence_mask(num_available_beams, self._beam_width) return tf.tile(tf.expand_dims(mask, axis=2), multiples=[1, 1, self._output_size])
def add_loss_op(self, result): logits = result.rnn_output with tf.control_dependencies([tf.assert_positive(tf.shape(logits)[1], data=[tf.shape(logits)])]): length_diff = tf.reshape(self.config.max_length - tf.shape(logits)[1], shape=(1,)) padding = tf.reshape(tf.concat([[0, 0, 0], length_diff, [0, 0]], axis=0), shape=(3, 2)) preds = tf.pad(logits, padding, mode='constant') # add epsilon to avoid division by 0 preds = preds + 1e-5 mask = tf.sequence_mask(self.output_length_placeholder, self.config.max_length, dtype=tf.float32) loss = tf.contrib.seq2seq.sequence_loss(preds, self.output_placeholder, mask) with tf.control_dependencies([tf.assert_non_negative(loss, data=[preds, mask], summarize=256*60*300)]): return tf.identity(loss)
def add_positional_embedding(self, model_input, num_frames, l2_penalty=1e-8): batch_size, max_frames, num_features = model_input.get_shape().as_list() positional_embedding = tf.get_variable("positional_embedding", dtype=tf.float32, shape=[1, max_frames, num_features], initializer=tf.zeros_initializer(), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) mask = tf.sequence_mask(lengths=num_frames, maxlen=max_frames, dtype=tf.float32) model_input_with_positional_embedding = tf.einsum("ijk,ij->ijk", model_input + positional_embedding, mask) return model_input_with_positional_embedding
def get_mean_input(self, model_input, num_frames): batch_size, max_frames, num_features = model_input.get_shape().as_list() mask = tf.sequence_mask(lengths=num_frames, maxlen=max_frames, dtype=tf.float32) mean_input = tf.einsum("ijk,ij->ik", model_input, mask) / tf.expand_dims(tf.cast(num_frames, dtype=tf.float32), dim=1) tiled_mean_input = tf.tile(tf.expand_dims(mean_input, dim=1), multiples=[1,max_frames,1]) return tiled_mean_input
def create_metric_ops(self, _inputs, labels, predictions): """Creates the metric op""" loss_mask = tf.sequence_mask( lengths=tf.to_int32(labels["target_len"] - 1), maxlen=tf.to_int32(tf.shape(predictions["losses"])[1])) return metrics.streaming_mean(predictions["losses"], loss_mask)
def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen): """Creates position embeddings. Args: embedding_dim: Dimensionality of the embeddings. An integer. num_positions: The number of positions to be embedded. For example, if you have inputs of length up to 100, this should be 100. An integer. lengths: The lengths of the inputs to create position embeddings for. An int32 tensor of shape `[batch_size]`. maxlen: The maximum length of the input sequence to create position embeddings for. An int32 tensor. Returns: A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains embeddings for each position. All elements past `lengths` are zero. """ # Create constant position encodings position_encodings = tf.constant( position_encoding(num_positions, embedding_dim), name="position_encoding") # Slice to size of current sequence pe_slice = position_encodings[:maxlen, :] # Replicate encodings for each element in the batch batch_size = tf.shape(lengths)[0] pe_batch = tf.tile([pe_slice], [batch_size, 1, 1]) # Mask out positions that are padded positions_mask = tf.sequence_mask( lengths=lengths, maxlen=maxlen, dtype=tf.float32) positions_embed = pe_batch * tf.expand_dims(positions_mask, 2) return positions_embed
def _mask(hypothesis, hypothesis_length, premise, premise_length): p_mask = tf.sequence_mask(premise_length, tf.shape(premise)[1], dtype=tf.float32) h_mask = tf.sequence_mask(hypothesis_length, tf.shape(hypothesis)[1], dtype=tf.float32) premise *= tf.expand_dims(p_mask, 2) hypothesis *= tf.expand_dims(h_mask, 2) return hypothesis, premise
def mask_3d(sequences, sequence_lengths, mask_value, dimension=2): """ Given a batch of matrices, each with shape m x n, mask the values in each row after the positions indicated in sentence_sizes. This function is supposed to mask the last columns in the raw attention matrix (e_{i, j}) in cases where the sentence2 is smaller than the maximum. Args: sequences: tensor with shape (batch_size, m, n) sequence_lengths: tensor with shape (batch_size) containing the sentence sizes that should be limited mask_value: scalar value to assign to items after sentence size dimension: over which dimension to mask values Returns: A tensor with the same shape as `values` """ if dimension == 1: sequences = tf.transpose(sequences, [0, 2, 1]) time_steps1, time_steps2 = tf.shape(sequences)[1], tf.shape(sequences)[2] ones = tf.ones_like(sequences, dtype=tf.int32) pad_values = mask_value * tf.cast(ones, tf.float32) mask = tf.sequence_mask(sequence_lengths, time_steps2) # mask is (batch_size, sentence2_size). we have to tile it for 3d mask3d = tf.tile(tf.expand_dims(mask, 1), (1, time_steps1, 1)) masked = tf.where(mask3d, sequences, pad_values) return tf.transpose(masked, [0, 2, 1]) if dimension == 1 else masked
def average_attention(hidden_states, encoder_input_length, *args, **kwargs): # attention with fixed weights (average of all hidden states) lengths = tf.to_float(tf.expand_dims(encoder_input_length, axis=1)) mask = tf.sequence_mask(encoder_input_length, maxlen=tf.shape(hidden_states)[1]) weights = tf.to_float(mask) / lengths weighted_average = tf.reduce_sum(hidden_states * tf.expand_dims(weights, axis=2), axis=1) return weighted_average, weights
def _create_position_embedding(self, lengths, maxlen): # Slice to size of current sequence pe_slice = self.pos_embed[2:maxlen+2, :] # Replicate encodings for each element in the batch batch_size = tf.shape(lengths)[0] pe_batch = tf.tile([pe_slice], [batch_size, 1, 1]) # Mask out positions that are padded positions_mask = tf.sequence_mask( lengths=lengths, maxlen=maxlen, dtype=tf.float32) positions_embed = pe_batch * tf.expand_dims(positions_mask, 2) return positions_embed
def _mask_by_length(t, length): maxlen = t.get_shape().as_list()[1] mask = tf.sequence_mask(length, maxlen=maxlen) mask = tf.expand_dims(tf.cast(mask, tf.float32), -1) return t * mask
def _create_position_embedding(embedding_dim, num_positions, lengths, maxlen): """Creates position embeddings. Args: embedding_dim: Dimensionality of the embeddings. An integer. num_positions: The number of positions to be embedded. For example, if you have inputs of length up to 100, this should be 100. An integer. lengths: The lengths of the inputs to create position embeddings for. An int32 tensor of shape `[batch_size]`. maxlen: The maximum length of the input sequence to create position embeddings for. An int32 tensor. Returns: A tensor of shape `[batch_size, maxlen, embedding_dim]` that contains embeddings for each position. All elements past `lengths` are zero. """ # Create constant position encodings position_encodings = tf.constant( _position_encoding(num_positions, embedding_dim), name="position_encoding") # Slice to size of current sequence pe_slice = position_encodings[:maxlen, :] # Replicate encodings for each element in the batch batch_size = tf.shape(lengths)[0] pe_batch = tf.tile([pe_slice], [batch_size, 1, 1]) # Mask out positions that are padded positions_mask = tf.sequence_mask( lengths=lengths, maxlen=maxlen, dtype=tf.float32) positions_embed = pe_batch * tf.expand_dims(positions_mask, 2) return positions_embed
def create_mask_for_keys(self, keys, keys_length): # batch_size x keys_l mask = 1 - tf.sequence_mask(lengths=keys_length, maxlen=keys.get_shape().as_list()[1], dtype=tf.float32) mask *= -2 ** 30 mask = tf.expand_dims(tf.expand_dims(mask, 1), 1) # batch_size x 1 x 1 x keys_l return mask
def create_mask_for_queries(self, queries, queries_len): # batch_size x queries_l mask = tf.sequence_mask(lengths=queries_len, maxlen=queries.get_shape().as_list()[1], dtype=tf.float32) mask = tf.expand_dims(tf.expand_dims(mask, 1), -1) # batch_size x 1 x queries x 1 return mask
def _build(self, inputs, sequence_length, labels, encoder_output, encoder_sequence_length, embedding_lookup=None): if embedding_lookup is None: output = PositionnalEmbedding(**self.embed_params)(inputs) else: output = embedding_lookup(inputs) output = tf.layers.dropout( output, self.params.dropout_rate) for _ in range(self.params.num_blocks): output = DecoderBlock(**self.block_params)(output, sequence_length, encoder_output, encoder_sequence_length) logits = tf.contrib.layers.fully_connected( output, self.params.vocab_size) max_sequence_length = tf.shape(inputs)[1] one_hot_labels = tf.one_hot(labels, self.params.vocab_size, axis=-1) with tf.name_scope("loss"): mask_loss = tf.sequence_mask(sequence_length, maxlen=max_sequence_length, dtype=tf.float32) one_hot_labels = tf.reshape(one_hot_labels, [-1, self.params.vocab_size]) loss = tf.nn.softmax_cross_entropy_with_logits(logits=tf.reshape(logits, [-1, self.params.vocab_size]), labels=one_hot_labels) loss = tf.reshape(loss, [-1, max_sequence_length]) loss *= mask_loss loss = tf.reduce_sum(loss, 1) / tf.reduce_sum(mask_loss, 1) mean_loss = tf.reduce_mean(loss) pred = tf.argmax(logits, axis=-1) acc = tf.equal(pred, labels) acc = tf.reduce_sum(tf.to_float(acc) * mask_loss, 1) / tf.reduce_sum(mask_loss, 1) acc = tf.reduce_mean(acc, name="accuracy") return mean_loss, tf.nn.log_softmax(logits)
def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends): mention_emb_list = [] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_emb_list.append(mention_start_emb) mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_emb_list.append(mention_end_emb) mention_width = 1 + mention_ends - mention_starts # [num_mentions] if self.config["use_features"]: mention_width_index = mention_width - 1 # [num_mentions] mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.config["max_mention_width"], self.config["feature_size"]]), mention_width_index) # [num_mentions, emb] mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout) mention_emb_list.append(mention_width_emb) if self.config["model_heads"]: mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width] mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width] mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb] self.head_scores = util.projection(text_outputs, 1) # [num_words, 1] mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1] mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1] mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb] mention_emb_list.append(mention_head_emb) mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb] return mention_emb
def mask_attn_score(score, memory_sequence_length, score_mask_value = -1e8): score_mask = tf.sequence_mask( memory_sequence_length, maxlen=score.shape[1]) score_mask_values = score_mask_value * tf.ones_like(score) return tf.where(score_mask, score, score_mask_values)
def cross_entropy_sequence_loss(logits, targets, sequence_length): with tf.name_scope("cross_entropy_sequence_loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # Mask out the losses we don't care about loss_mask = tf.sequence_mask( tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0])) losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0]) return losses
def apply(self, is_train, x, mask=None): mask = tf.sequence_mask(mask, tf.shape(x)[1]) output = weight_layers(1, x, mask, self.l2_coef, do_layer_norm=self.layer_norm, use_top_only=self.top_layer_only)["weighted_ops"][0] return output
def compute_attention_mask(x_mask, mem_mask, x_word_dim, key_word_dim): """ computes a (batch, x_word_dim, key_word_dim) bool mask for clients that want masking """ if x_mask is None and mem_mask is None: return None elif x_mask is None or mem_mask is None: raise NotImplementedError() x_mask = tf.sequence_mask(x_mask, x_word_dim) mem_mask = tf.sequence_mask(mem_mask, key_word_dim) join_mask = tf.logical_and(tf.expand_dims(x_mask, 2), tf.expand_dims(mem_mask, 1)) return join_mask
def get_mean_logit(self): logits = (self.start_logits + self.end_logits) / 2.0 bol_mask = tf.sequence_mask(self.mask, tf.shape(self.start_logits)[1]) bol_mask = tf.cast(bol_mask, tf.float32) return tf.reduce_sum(logits*bol_mask, axis=[1]) / tf.reduce_sum(bol_mask, axis=[1])