Python util 模块，softmax() 实例源码

我们从Python开源项目中，提取了以下8个代码示例，用于说明如何使用util.softmax()。

项目：RNTN 作者：munikarmanish | 项目源码 | 文件源码

def predict(self, tree):
        if tr.isleaf(tree):
            # output = word vector
            try:
                tree.vector = self.L[:, self.word_map[tree[0]]]
            except:
                tree.vector = self.L[:, self.word_map[tr.UNK]]
        else:
            # calculate output of child nodes
            self.predict(tree[0])
            self.predict(tree[1])

            # compute output
            lr = np.hstack([tree[0].vector, tree[1].vector])
            tree.vector = np.tanh(
                np.tensordot(self.V, np.outer(lr, lr), axes=([1, 2], [0, 1])) +
                np.dot(self.W, lr) + self.b)

        # softmax
        import util
        tree.output = util.softmax(np.dot(self.Ws, tree.vector) + self.bs)
        label = np.argmax(tree.output)
        tree.set_label(str(label))
        return tree

项目：reddit-iambic-pentameter 作者：pmichel31415 | 项目源码 | 文件源码

def sample_noun(self, vector):
        """Sample a noun at random.
        The probability of word :math:`w` is
        .. math::
            \log(p(w))\propto w^Tv`

        where :math:`p` is the poem vector and :math:`w` the word vector"""
        p = util.softmax(self.noun_vectors.dot(vector) / self.tau)
        return npr.choice(self.nouns, p=p)

项目：reddit-iambic-pentameter 作者：pmichel31415 | 项目源码 | 文件源码

def sample_adjective(self, vector):
        """Sample an adjective at random (same method as sample_noun)"""
        p = util.softmax(self.adj_vectors.dot(vector) / self.tau)
        return npr.choice(self.adjs, p=p)

项目：dict_based_learning 作者：tombosc | 项目源码 | 文件源码

def proba(self, features, params):
        """
        return a categorical probability distribution over the vocabulary
        """
        product = np.dot(features, params)
        return softmax(product, self.T)

项目：RNTN 作者：munikarmanish | 项目源码 | 文件源码

def forward_prop(self, tree):
        cost = 0.0
        result = np.zeros((5,5))

        if tr.isleaf(tree):
            # output = word vector
            try:
                tree.vector = self.L[:, self.word_map[tree[0]]]
            except:
                tree.vector = self.L[:, self.word_map[tr.UNK]]
            tree.fprop = True
        else:
            # calculate output of child nodes
            lcost, lresult = self.forward_prop(tree[0])
            rcost, rresult = self.forward_prop(tree[1])
            cost += lcost + rcost
            result += lresult + rresult

            # compute output
            lr = np.hstack([tree[0].vector, tree[1].vector])
            tree.vector = np.tanh(
                np.tensordot(self.V, np.outer(lr, lr), axes=([1, 2], [0, 1])) +
                np.dot(self.W, lr) + self.b)

        # softmax
        tree.output = np.dot(self.Ws, tree.vector) + self.bs
        tree.output -= np.max(tree.output)
        tree.output = np.exp(tree.output)
        tree.output /= np.sum(tree.output)

        tree.frop = True

        # cost
        cost -= np.log(tree.output[int(tree.label())])
        true_label = int(tree.label())
        predicted_label = np.argmax(tree.output)
        result[true_label, predicted_label] += 1

        return cost, result

项目：RNTN 作者：munikarmanish | 项目源码 | 文件源码

def back_prop(self, tree, error=None):
        # clear nodes
        tree.frop = False

        # softmax grad
        deltas = tree.output
        deltas[int(tree.label())] -= 1.0
        self.dWs += np.outer(deltas, tree.vector)
        self.dbs += deltas
        deltas = np.dot(self.Ws.T, deltas)
        if error is not None:
            deltas += error
        deltas *= (1 - tree.vector**2)

        # leaf node => update word vectors
        if tr.isleaf(tree):
            try:
                index = self.word_map[tree[0]]
            except KeyError:
                index = self.word_map[tr.UNK]
            self.dL[index] += deltas
            return

        # Hidden gradients
        else:
            lr = np.hstack([tree[0].vector, tree[1].vector])
            outer = np.outer(deltas, lr)
            self.dV += (np.outer(lr, lr)[..., None] * deltas).T
            self.dW += outer
            self.db += deltas

            # Compute error for children
            deltas = np.dot(self.W.T, deltas)
            deltas += np.tensordot(self.V.transpose((0,2,1)) + self.V, outer.T,
                                   axes=([1,0], [0,1]))

            self.back_prop(tree[0], deltas[:self.dim])
            self.back_prop(tree[1], deltas[self.dim:])

项目：dict_based_learning 作者：tombosc | 项目源码 | 文件源码

def __init__(self, n_primes, n_composed, features_size, markov_order, 
                 temperature=1.0, min_len_definitions=2, max_len_definitions=4):
        """
        markov_order: integer at least 1 such that
            p(x_t|x_t-1:x_1) = p(x_t|x_t-1:x_t-markov_order)
        temperature: temperature for softmax
        """
        self.mo = markov_order
        self.np = n_primes
        self.nc = n_composed
        self.V = self.np + self.nc
        self.T = temperature
        self.min_len_def = min_len_definitions
        self.max_len_def = max_len_definitions
        self.features_size = features_size

        # tokens are composed of a..z letters 
        alphabet = ''.join([chr(c) for c in range(97, 97+26)]) # str(a..z)
        # tokens all have the same size tok_len
        self.tok_len = int(np.log(self.V) / np.log(len(alphabet)) + 1)
        # enumerate all the tokens
        self.vocabulary = []
        for i, tok in zip(range(self.V),
                          itertools.product(alphabet, repeat=self.tok_len)):
            self.vocabulary.append(''.join(tok))

        self.params = uniform(0,1,(self.mo * features_size, self.V))
        self.features = uniform(0,1,(self.V,features_size))
        self.dictionary = {}
        for i in range(self.np, self.np+self.nc):
            # sample len of def, sample def, store in dictionary
            # then compute the features as a rescaled mean of the features
            len_diff = self.max_len_def - self.min_len_def
            len_def = np.random.choice(len_diff) + self.min_len_def
            definition = np.random.choice(self.np, size=len_def, replace=False)
            tok = self.vocabulary[i]
            self.dictionary[tok] = [self.vocabulary[e] for e in definition]
            #factor = np.random.beta(a=3, b=2.5) # closer to 1 than 0
            #factor = np.random.beta(a=1, b=3) # closer to 0 than 1
            factor = 1#1/(8*self.nc)
            f = factor * np.mean([self.features[e] for e in definition], axis=0)
            self.features[i] = f

        self.initial_features = uniform(0,1,(self.mo, features_size))

项目：attention-over-attention-tf-QA 作者：lc222 | 项目源码 | 文件源码

def inference(documents, doc_mask, query, query_mask):

  embedding = tf.get_variable('embedding',
              [FLAGS.vocab_size, FLAGS.embedding_size],
              initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))

  regularizer = tf.nn.l2_loss(embedding)

  doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, documents), FLAGS.dropout_keep_prob)
  doc_emb.set_shape([None, None, FLAGS.embedding_size])

  query_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, query), FLAGS.dropout_keep_prob)
  query_emb.set_shape([None, None, FLAGS.embedding_size])

  with tf.variable_scope('document', initializer=orthogonal_initializer()):
    fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
    back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)

    doc_len = tf.reduce_sum(doc_mask, reduction_indices=1)
    h, _ = tf.nn.bidirectional_dynamic_rnn(
        fwd_cell, back_cell, doc_emb, sequence_length=tf.to_int64(doc_len), dtype=tf.float32)
    #h_doc = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
    h_doc = tf.concat(h, 2)

  with tf.variable_scope('query', initializer=orthogonal_initializer()):
    fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
    back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)

    query_len = tf.reduce_sum(query_mask, reduction_indices=1)
    h, _ = tf.nn.bidirectional_dynamic_rnn(
        fwd_cell, back_cell, query_emb, sequence_length=tf.to_int64(query_len), dtype=tf.float32)
    #h_query = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
    h_query = tf.concat(h, 2)

  M = tf.matmul(h_doc, h_query, adjoint_b=True)
  M_mask = tf.to_float(tf.matmul(tf.expand_dims(doc_mask, -1), tf.expand_dims(query_mask, 1)))

  alpha = softmax(M, 1, M_mask)
  beta = softmax(M, 2, M_mask)

  #query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1)
  query_importance = tf.expand_dims(tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1)

  s = tf.squeeze(tf.matmul(alpha, query_importance), [2])

  unpacked_s = zip(tf.unstack(s, FLAGS.batch_size), tf.unstack(documents, FLAGS.batch_size))
  y_hat = tf.stack([tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size) for (attentions, sentence_ids) in unpacked_s])

  return y_hat, regularizer