我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用tensorflow.nce_loss()。
def loss_nce(self,l2_lambda=0.0001): #0.0001-->0.001 """calculate loss using (NCE)cross entropy here""" # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. if self.is_training: #training #labels=tf.reshape(self.input_y,[-1]) #[batch_size,1]------>[batch_size,] labels=tf.expand_dims(self.input_y,1) #[batch_size,]----->[batch_size,1] loss = tf.reduce_mean( #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. tf.nn.nce_loss(weights=tf.transpose(self.W_projection),#[hidden_size*2, num_classes]--->[num_classes,hidden_size*2]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K. biases=self.b_projection, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`. labels=labels, #[batch_size,1]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes. inputs=self.output_rnn_last,# [batch_size,hidden_size*2] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled=self.num_sampled, #scalar. 100 num_classes=self.num_classes,partition_strategy="div")) #scalar. 1999 l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda loss = loss + l2_losses return loss
def loss(self,l2_lambda=0.0001): """calculate loss using (NCE)cross entropy here""" # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. if self.is_training:#training #labels=tf.reshape(self.labels,[-1]) #3.[batch_size,max_label_per_example]------>[batch_size*max_label_per_example,] #labels=tf.expand_dims(labels,1) #[batch_size*max_label_per_example,]----->[batch_size*max_label_per_example,1] #nce_loss: notice-->for now, if you have a variable number of target classes, you can pad them out to a constant number by either repeating them or by padding with an otherwise unused class. loss = tf.reduce_mean(#inputs's SHAPE should be: [batch_size, dim] tf.nn.nce_loss(weights=tf.transpose(self.W), #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K. biases=self.b, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`. labels=self.labels, #4.[batch_size,max_label_per_example]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes. inputs=self.sentence_embeddings,#TODO [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled=self.num_sampled, # scalar. 100 num_true=self.max_label_per_example, num_classes=self.label_size,partition_strategy="div")) #scalar. 1999 else:#eval(/inference) labels_multi_hot = self.labels_l1999 #[batch_size,label_size] #sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_multi_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size] loss = tf.reduce_sum(loss, axis=1) # add regularization result in not converge l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda loss=loss+l2_losses return loss
def loss(self,l2_lambda=0.01): #0.0001-->0.001 """calculate loss using (NCE)cross entropy here""" # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. if self.is_training: #training labels=tf.reshape(self.labels,[-1]) #[batch_size,1]------>[batch_size,] labels=tf.expand_dims(labels,1) #[batch_size,]----->[batch_size,1] loss = tf.reduce_mean( #inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. tf.nn.nce_loss(weights=tf.transpose(self.W), #[embed_size, label_size]--->[label_size,embed_size]. nce_weights:A `Tensor` of shape `[num_classes, dim].O.K. biases=self.b, #[label_size]. nce_biases:A `Tensor` of shape `[num_classes]`. labels=labels, #[batch_size,1]. train_labels, # A `Tensor` of type `int64` and shape `[batch_size,num_true]`. The target classes. inputs=self.sentence_embeddings,# [None,self.embed_size] #A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. num_sampled=self.num_sampled, #scalar. 100 num_classes=self.label_size,partition_strategy="div")) #scalar. 1999 else:#eval/inference #logits = tf.matmul(self.sentence_embeddings, tf.transpose(self.W)) #matmul([None,self.embed_size])---> #logits = tf.nn.bias_add(logits, self.b) labels_one_hot = tf.one_hot(self.labels, self.label_size) #[batch_size]---->[batch_size,label_size] #sigmoid_cross_entropy_with_logits:Computes sigmoid cross entropy given `logits`.Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For instance, one could perform multilabel classification where a picture can contain both an elephant and a dog at the same time. loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_one_hot,logits=self.logits) #labels:[batch_size,label_size];logits:[batch, label_size] print("loss0:", loss) #shape=(?, 1999) loss = tf.reduce_sum(loss, axis=1) print("loss1:",loss) #shape=(?,) l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda return loss
def __call__(self, embed, train_labels): with tf.name_scope("negative_sampling"): # mask out skip or OOV # if switched on, this yields ... # UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory. # mask = tf.greater(train_labels, NegativeSampling.IGNORE_LABEL_MAX) # # mask = tf.not_equal(train_labels, NegativeSampling.IGNORE_LABEL) # embed = tf.boolean_mask(embed, mask) # train_labels = tf.expand_dims(tf.boolean_mask(train_labels, mask), -1) train_labels = tf.expand_dims(train_labels, -1) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. # By default this uses a log-uniform (Zipfian) distribution for sampling # and therefore assumes labels are sorted - which they are! sampler = (self.freqs if self.freqs is None # default to unigram else tf.nn.fixed_unigram_candidate_sampler( train_labels, num_true=1, num_sampled=self.sample_size, unique=True, range_max=self.vocab_size, #num_reserved_ids=2, # skip or OoV # ^ only if not in unigrams distortion=self.power, unigrams=list(self.freqs))) loss = tf.reduce_mean( tf.nn.nce_loss(self.nce_weights, self.nce_biases, embed, # summed doc and context embedding train_labels, self.sample_size, self.vocab_size, sampled_values=sampler), # log-unigram if not specificed name="nce_batch_loss") # TODO negative sampling versus NCE # TODO uniform vs. Zipf with exponent `distortion` param #https://www.tensorflow.org/versions/r0.12/api_docs/python/nn.html#log_uniform_candidate_sampler return loss
def __init__( self, inputs = None, train_labels = None, vocabulary_size = 80000, embedding_size = 200, num_sampled = 64, nce_loss_args = {}, E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0), E_init_args = {}, nce_W_init = tf.truncated_normal_initializer(stddev=0.03), nce_W_init_args = {}, nce_b_init = tf.constant_initializer(value=0.0), nce_b_init_args = {}, name ='word2vec_layer', ): Layer.__init__(self, name=name) self.inputs = inputs print(" [TL] Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) # Look up embeddings for inputs. # Note: a row of 'embeddings' is the vector representation of a word. # for the sake of speed, it is better to slice the embedding matrix # instead of transfering a word id to one-hot-format vector and then # multiply by the embedding matrix. # embed is the outputs of the hidden layer (embedding layer), it is a # row vector with 'embedding_size' values. with tf.variable_scope(name) as vs: embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, **E_init_args) embed = tf.nn.embedding_lookup(embeddings, self.inputs) # Construct the variables for the NCE loss (i.e. negative sampling) nce_weights = tf.get_variable(name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, **nce_W_init_args) nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, **nce_b_init_args) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels # each time we evaluate the loss. self.nce_cost = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, inputs=embed, labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size, **nce_loss_args)) self.outputs = embed self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1) self.all_layers = [self.outputs] self.all_params = [embeddings, nce_weights, nce_biases] self.all_drop = {}
def __init__( self, inputs = None, train_labels = None, vocabulary_size = 80000, embedding_size = 200, num_sampled = 64, nce_loss_args = {}, E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0), E_init_args = {}, nce_W_init = tf.truncated_normal_initializer(stddev=0.03), nce_W_init_args = {}, nce_b_init = tf.constant_initializer(value=0.0), nce_b_init_args = {}, name ='word2vec_layer', ): Layer.__init__(self, name=name) self.inputs = inputs print(" [TL] Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) # Look up embeddings for inputs. # Note: a row of 'embeddings' is the vector representation of a word. # for the sake of speed, it is better to slice the embedding matrix # instead of transfering a word id to one-hot-format vector and then # multiply by the embedding matrix. # embed is the outputs of the hidden layer (embedding layer), it is a # row vector with 'embedding_size' values. with tf.variable_scope(name) as vs: embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=D_TYPE, **E_init_args) embed = tf.nn.embedding_lookup(embeddings, self.inputs) # Construct the variables for the NCE loss (i.e. negative sampling) nce_weights = tf.get_variable(name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, dtype=D_TYPE, **nce_W_init_args) nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, dtype=D_TYPE, **nce_b_init_args) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels # each time we evaluate the loss. self.nce_cost = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, inputs=embed, labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size, **nce_loss_args)) self.outputs = embed self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1) self.all_layers = [self.outputs] self.all_params = [embeddings, nce_weights, nce_biases] self.all_drop = {}
def __init__( self, inputs = None, train_labels = None, vocabulary_size = 80000, embedding_size = 200, num_sampled = 64, nce_loss_args = {}, E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0), E_init_args = {}, nce_W_init = tf.truncated_normal_initializer(stddev=0.03), nce_W_init_args = {}, nce_b_init = tf.constant_initializer(value=0.0), nce_b_init_args = {}, name ='word2vec_layer', ): Layer.__init__(self, name=name) self.inputs = inputs self.n_units = embedding_size print(" tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) # Look up embeddings for inputs. # Note: a row of 'embeddings' is the vector representation of a word. # for the sake of speed, it is better to slice the embedding matrix # instead of transfering a word id to one-hot-format vector and then # multiply by the embedding matrix. # embed is the outputs of the hidden layer (embedding layer), it is a # row vector with 'embedding_size' values. with tf.variable_scope(name) as vs: embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, **E_init_args) embed = tf.nn.embedding_lookup(embeddings, self.inputs) # Construct the variables for the NCE loss (i.e. negative sampling) nce_weights = tf.get_variable(name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, **nce_W_init_args) nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, **nce_b_init_args) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels # each time we evaluate the loss. self.nce_cost = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, inputs=embed, labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size, **nce_loss_args)) self.outputs = embed self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1) self.all_layers = [self.outputs] self.all_params = [embeddings, nce_weights, nce_biases] self.all_drop = {}
def __init__( self, inputs = None, train_labels = None, vocabulary_size = 80000, embedding_size = 200, num_sampled = 64, nce_loss_args = {}, E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0), E_init_args = {}, nce_W_init = tf.truncated_normal_initializer(stddev=0.03), nce_W_init_args = {}, nce_b_init = tf.constant_initializer(value=0.0), nce_b_init_args = {}, name ='word2vec_layer', ): Layer.__init__(self, name=name) self.inputs = inputs print(" tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) # Look up embeddings for inputs. # Note: a row of 'embeddings' is the vector representation of a word. # for the sake of speed, it is better to slice the embedding matrix # instead of transfering a word id to one-hot-format vector and then # multiply by the embedding matrix. # embed is the outputs of the hidden layer (embedding layer), it is a # row vector with 'embedding_size' values. with tf.variable_scope(name) as vs: embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, **E_init_args) embed = tf.nn.embedding_lookup(embeddings, self.inputs) # Construct the variables for the NCE loss (i.e. negative sampling) nce_weights = tf.get_variable(name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, **nce_W_init_args) nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, **nce_b_init_args) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels # each time we evaluate the loss. self.nce_cost = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, inputs=embed, labels=train_labels, num_sampled=num_sampled, num_classes=vocabulary_size, **nce_loss_args)) self.outputs = embed self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1) self.all_layers = [self.outputs] self.all_params = [embeddings, nce_weights, nce_biases] self.all_drop = {}