Python scipy.special 模块,psi() 实例源码

我们从Python开源项目中,提取了以下36个代码示例,用于说明如何使用scipy.special.psi()

项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def inv_digamma_minus_log(y, tol=1e-10, n_iter=100):
    """
    Solve y = psi(alpha) - log(alpha) for alpha by fixed point
    integration.
    """
    if y >= -log(6.):
        x = 1 / (2 * (1 - exp(y)))
    else:
        x = 1.e-10
    for _i in range(n_iter):
        z = approx_psi(x) - log(x) - y
        if abs(z) < tol:
            break
        x -= x * z / (x * d_approx_psi(x) - 1)
        x = abs(x)
    return x
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def initial_values(self, tol=1e-10):
        """
        Generate initial values by doing fixed point
        iterations to solve for alpha
        """
        n, a, b = self.n, self.a, self.b

        if abs(b) < 1e-10:
            alpha = inv_psi(a / n)
        else:
            alpha = 1.

        z = tol + 1.

        while abs(z) > tol:

            z = n * psi(alpha) - \
                b / numpy.clip(alpha, 1e-300, 1e300) - a

            alpha -= z / (n * d_approx_psi(alpha) - b
                          / (alpha ** 2 + 1e-300))
            alpha = numpy.clip(alpha, 1e-100, 1e300)

        return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \
               alpha + 1 / (n + 1e-300), alpha
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def estimate(self, context, data):
        pdf = ScaleMixture()
        alpha = context.prior.alpha
        beta = context.prior.beta
        d = context._d

        if len(data.shape) == 1:
            data = data[:, numpy.newaxis]

        a = alpha + 0.5 * d * len(data.shape)
        b = beta + 0.5 * data.sum(-1) ** 2

        s = a / b
        log_s = psi(a) - log(b)

        pdf.scales = s
        context.prior.estimate([s, log_s])
        pdf.prior = context.prior

        return pdf
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_expectations(self):
        """
        Since we're doing lazy updates on lambda, at any given moment
        the current state of lambda may not be accurate. This function
        updates all of the elements of lambda and Elogbeta
        so that if (for example) we want to print out the
        topics we've learned we'll get the correct behavior.
        """
        for w in xrange(self.m_W):
            self.m_lambda[:, w] *= np.exp(self.m_r[-1] -
                                          self.m_r[self.m_timestamp[w]])
        self.m_Elogbeta = sp.psi(self.m_eta + self.m_lambda) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        self.m_timestamp[:] = self.m_updatect
        self.m_status_up_to_date = True
项目:SEVDL_MGP    作者:AMLab-Amsterdam    | 项目源码 | 文件源码
def loglike(nn, sample_preds, y):
    """Return the Avg. Test Log-Likelihood
    """
    if y.shape[1] == 1:
        y = y.ravel()
    sample_ll = np.zeros((sample_preds.shape[1], sample_preds.shape[0]))
    a, b = np.exp(nn.extra_inf['a1'].get_value()), np.exp(nn.extra_inf['b1'].get_value())
    etau, elogtau = (a / b).astype(np.float32), (psi(a) - np.log(b)).astype(np.float32)
    for sample in xrange(sample_preds.shape[0]):
        ypred = sample_preds[sample].astype(np.float32)
        if len(y.shape) > 1:
            sll = -.5 * np.sum(etau * (y - ypred)**2, axis=1)
        else:
            sll = -.5 * etau * (y - ypred)**2
        sample_ll[:, sample] = sll

    return np.mean(logsumexp(sample_ll, axis=1) - np.log(sample_preds.shape[0]) - .5 * np.log(2*np.pi) + .5 * elogtau.sum())
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), compute E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(sp.psi(alpha) - sp.psi(np.sum(alpha)))
    return(sp.psi(alpha) - sp.psi(np.sum(alpha, 1))[:, np.newaxis])
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def expect_log_sticks(sticks):
    """
    For stick-breaking hdp, return the E[log(sticks)]
    """
    dig_sum = sp.psi(np.sum(sticks, 0))
    ElogW = sp.psi(sticks[0]) - dig_sum
    Elog1_W = sp.psi(sticks[1]) - dig_sum

    n = len(sticks[0]) + 1
    Elogsticks = np.zeros(n)
    Elogsticks[0: n - 1] = ElogW
    Elogsticks[1:] = Elogsticks[1:] + np.cumsum(Elog1_W)
    return Elogsticks
项目:paragraph2vec    作者:thunlp    | 项目源码 | 文件源码
def update_chunk(self, chunk, update=True, opt_o=True):
        # Find the unique words in this chunk...
        unique_words = dict()
        word_list = []
        for doc in chunk:
            for word_id, _ in doc:
                if word_id not in unique_words:
                    unique_words[word_id] = len(unique_words)
                    word_list.append(word_id)

        Wt = len(word_list) # length of words in these documents

        # ...and do the lazy updates on the necessary columns of lambda
        rw = np.array([self.m_r[t] for t in self.m_timestamp[word_list]])
        self.m_lambda[:, word_list] *= np.exp(self.m_r[-1] - rw)
        self.m_Elogbeta[:, word_list] = \
            sp.psi(self.m_eta + self.m_lambda[:, word_list]) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        ss = SuffStats(self.m_T, Wt, len(chunk))

        Elogsticks_1st = expect_log_sticks(self.m_var_sticks) # global sticks

        # run variational inference on some new docs
        score = 0.0
        count = 0
        for doc in chunk:
            if len(doc) > 0:
                doc_word_ids, doc_word_counts = zip(*doc)
                doc_score = self.doc_e_step(doc, ss, Elogsticks_1st,
                    word_list, unique_words, doc_word_ids,
                    doc_word_counts, self.m_var_converge)
                count += sum(doc_word_counts)
                score += doc_score

        if update:
            self.update_lambda(ss, word_list, opt_o)

        return (score, count)
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def __call__(self, x):

        from scipy.special import gammaln

        return self.a * x - \
               self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \
               self.b * log(x), \
               self.a - self.n * psi(x) + self.b / x
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), compute E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(sp.psi(alpha) - sp.psi(np.sum(alpha)))
    return(sp.psi(alpha) - sp.psi(np.sum(alpha, 1))[:, np.newaxis])
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def expect_log_sticks(sticks):
    """
    For stick-breaking hdp, return the E[log(sticks)]
    """
    dig_sum = sp.psi(np.sum(sticks, 0))
    ElogW = sp.psi(sticks[0]) - dig_sum
    Elog1_W = sp.psi(sticks[1]) - dig_sum

    n = len(sticks[0]) + 1
    Elogsticks = np.zeros(n)
    Elogsticks[0: n - 1] = ElogW
    Elogsticks[1:] = Elogsticks[1:] + np.cumsum(Elog1_W)
    return Elogsticks
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_chunk(self, chunk, update=True, opt_o=True):
        # Find the unique words in this chunk...
        unique_words = dict()
        word_list = []
        for doc in chunk:
            for word_id, _ in doc:
                if word_id not in unique_words:
                    unique_words[word_id] = len(unique_words)
                    word_list.append(word_id)

        Wt = len(word_list) # length of words in these documents

        # ...and do the lazy updates on the necessary columns of lambda
        rw = np.array([self.m_r[t] for t in self.m_timestamp[word_list]])
        self.m_lambda[:, word_list] *= np.exp(self.m_r[-1] - rw)
        self.m_Elogbeta[:, word_list] = \
            sp.psi(self.m_eta + self.m_lambda[:, word_list]) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        ss = SuffStats(self.m_T, Wt, len(chunk))

        Elogsticks_1st = expect_log_sticks(self.m_var_sticks) # global sticks

        # run variational inference on some new docs
        score = 0.0
        count = 0
        for doc in chunk:
            if len(doc) > 0:
                doc_word_ids, doc_word_counts = zip(*doc)
                doc_score = self.doc_e_step(doc, ss, Elogsticks_1st,
                    word_list, unique_words, doc_word_ids,
                    doc_word_counts, self.m_var_converge)
                count += sum(doc_word_counts)
                score += doc_score

        if update:
            self.update_lambda(ss, word_list, opt_o)

        return (score, count)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), compute E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(sp.psi(alpha) - sp.psi(np.sum(alpha)))
    return(sp.psi(alpha) - sp.psi(np.sum(alpha, 1))[:, np.newaxis])
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_chunk(self, chunk, update=True, opt_o=True):
        # Find the unique words in this chunk...
        unique_words = dict()
        word_list = []
        for doc in chunk:
            for word_id, _ in doc:
                if word_id not in unique_words:
                    unique_words[word_id] = len(unique_words)
                    word_list.append(word_id)

        Wt = len(word_list) # length of words in these documents

        # ...and do the lazy updates on the necessary columns of lambda
        rw = np.array([self.m_r[t] for t in self.m_timestamp[word_list]])
        self.m_lambda[:, word_list] *= np.exp(self.m_r[-1] - rw)
        self.m_Elogbeta[:, word_list] = \
            sp.psi(self.m_eta + self.m_lambda[:, word_list]) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        ss = SuffStats(self.m_T, Wt, len(chunk))

        Elogsticks_1st = expect_log_sticks(self.m_var_sticks) # global sticks

        # run variational inference on some new docs
        score = 0.0
        count = 0
        for doc in chunk:
            if len(doc) > 0:
                doc_word_ids, doc_word_counts = zip(*doc)
                doc_score = self.doc_e_step(doc, ss, Elogsticks_1st,
                    word_list, unique_words, doc_word_ids,
                    doc_word_counts, self.m_var_converge)
                count += sum(doc_word_counts)
                score += doc_score

        if update:
            self.update_lambda(ss, word_list, opt_o)

        return (score, count)
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), compute E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(sp.psi(alpha) - sp.psi(np.sum(alpha)))
    return(sp.psi(alpha) - sp.psi(np.sum(alpha, 1))[:, np.newaxis])
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def expect_log_sticks(sticks):
    """
    For stick-breaking hdp, return the E[log(sticks)]
    """
    dig_sum = sp.psi(np.sum(sticks, 0))
    ElogW = sp.psi(sticks[0]) - dig_sum
    Elog1_W = sp.psi(sticks[1]) - dig_sum

    n = len(sticks[0]) + 1
    Elogsticks = np.zeros(n)
    Elogsticks[0: n - 1] = ElogW
    Elogsticks[1:] = Elogsticks[1:] + np.cumsum(Elog1_W)
    return Elogsticks
项目:topical_word_embeddings    作者:thunlp    | 项目源码 | 文件源码
def update_chunk(self, chunk, update=True, opt_o=True):
        # Find the unique words in this chunk...
        unique_words = dict()
        word_list = []
        for doc in chunk:
            for word_id, _ in doc:
                if word_id not in unique_words:
                    unique_words[word_id] = len(unique_words)
                    word_list.append(word_id)

        Wt = len(word_list) # length of words in these documents

        # ...and do the lazy updates on the necessary columns of lambda
        rw = np.array([self.m_r[t] for t in self.m_timestamp[word_list]])
        self.m_lambda[:, word_list] *= np.exp(self.m_r[-1] - rw)
        self.m_Elogbeta[:, word_list] = \
            sp.psi(self.m_eta + self.m_lambda[:, word_list]) - \
            sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])

        ss = SuffStats(self.m_T, Wt, len(chunk))

        Elogsticks_1st = expect_log_sticks(self.m_var_sticks) # global sticks

        # run variational inference on some new docs
        score = 0.0
        count = 0
        for doc in chunk:
            if len(doc) > 0:
                doc_word_ids, doc_word_counts = zip(*doc)
                doc_score = self.doc_e_step(doc, ss, Elogsticks_1st,
                    word_list, unique_words, doc_word_ids,
                    doc_word_counts, self.m_var_converge)
                count += sum(doc_word_counts)
                score += doc_score

        if update:
            self.update_lambda(ss, word_list, opt_o)

        return (score, count)
项目:DataScience-And-MachineLearning-Handbook-For-Coders    作者:wxyyxc1992    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(psi(alpha) - psi(n.sum(alpha)))
    return(psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])
项目:adversarial-variational-bayes    作者:gdikov    | 项目源码 | 文件源码
def estimation(self, y1, y2):
        """ Estimate cross-entropy.

        Parameters
        ----------
        y1 : (number of samples1, dimension)-ndarray
             One row of y1 corresponds to one sample.
        y2 : (number of samples2, dimension)-ndarray
             One row of y2 corresponds to one sample.

        Returns
        -------
        c : float
            Estimated cross-entropy.

        References
        ----------
        Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of
        Renyi information estimators for multidimensional densities.
        Annals of Statistics, 36(5):2153-2182, 2008.

        Examples
        --------
        c = co.estimation(y1,y2)  

        """

        # verification:
        self.verification_equal_d_subspaces(y1, y2)

        num_of_samples2, dim = y2.shape  # number of samples, dimension

        # computation:
        v = volume_of_the_unit_ball(dim)
        distances_y2y1 = knn_distances(y2, y1, False, self.knn_method,
                                       self.k, self.eps, 2)[0]
        c = log(v) + log(num_of_samples2) - psi(self.k) + \
            dim * mean(log(distances_y2y1[:, -1]))

        return c
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def _compute_expectations(gamma, rho):
    return (gamma/rho, special.psi(gamma) - np.log(rho))
项目:Apply-LDA    作者:xiegonghai    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(psi(alpha) - psi(n.sum(alpha)))
    return(psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])
项目:Apply-LDA    作者:xiegonghai    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(psi(alpha) - psi(n.sum(alpha)))
    return(psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])
项目:pymake    作者:dtrckd    | 项目源码 | 文件源码
def run_e_step(self):
        """ compute variational expectations
        """
        ll = 0.

        for p in range(self.N):
            for q in range(self.N):
                new_phi = np.zeros(self.K)

                for g in range(self.K):
                    new_phi[g] = np.exp(psi(self.gamma[p,g])-psi(np.sum(self.gamma[p,:]))) * np.prod(( (self.B[g,:]**self.Y[p,q])
                        * ((1.-self.B[g,:])**(1.-self.Y[p,q])) )
                        ** self.phi[q,p,:] )
                self.phi[p,q,:] = new_phi/np.sum(new_phi)

                new_phi = np.zeros(self.K)
                for h in range(self.K):
                    new_phi[h] = np.exp(psi(self.gamma[q,h])-psi(np.sum(self.gamma[q,:]))) * np.prod(( (self.B[:,h]**self.Y[p,q])
                        * ((1.-self.B[:,h])**(1.-self.Y[p,q])) )
                        ** self.phi[p,q,:] )
                self.phi[q,p,:] = new_phi/np.sum(new_phi)

                for k in range(self.K):
                    self.gamma[p,k] = self.alpha[k] + np.sum(self.phi[p,:,k]) + np.sum(self.phi[:,p,k])
                    self.gamma[q,k] = self.alpha[k] + np.sum(self.phi[q,:,k]) + np.sum(self.phi[:,q,k])

        return ll
项目:stochasticLDA    作者:qlai    | 项目源码 | 文件源码
def dirichlet_expectation(alpha):
    '''see onlineldavb.py by Blei et al'''
    if (len(alpha.shape) == 1):
        return (psi(alpha) - psi(n.sum(alpha)))
    return (psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])
项目:stochasticLDA    作者:qlai    | 项目源码 | 文件源码
def beta_expectation(a, b, k):
    mysum = psi(a + b)
    Elog_a = psi(a) - mysum
    Elog_b = psi(b) - mysum
    Elog_beta = n.zeros(k)
    Elog_beta[0] = Elog_a[0]
    # print Elog_beta
    for i in range(1, k):
        Elog_beta[i] = Elog_a[i] + n.sum(Elog_b[0:i])
        # print Elog_beta
    # print Elog_beta
    return Elog_beta
项目:LDA-Variational-EM    作者:laserwave    | 项目源码 | 文件源码
def variationalInference(docs, d, gamma, phi):
    phisum = 0
    oldphi = np.zeros([K])
    digamma_gamma = np.zeros([K])

    for z in range(0, K):
        gamma[d][z] = alpha + docs[d].wordCount * 1.0 / K
        digamma_gamma[z] = psi(gamma[d][z])
        for w in range(0, len(docs[d].itemIdList)):
            phi[w, z] = 1.0 / K

    for iteration in range(0, iterInference):
        for w in range(0, len(docs[d].itemIdList)):
            phisum = 0
            for z in range(0, K):
                oldphi[z] = phi[w, z]
                phi[w, z] = digamma_gamma[z] + varphi[z, docs[d].itemIdList[w]]
                if z > 0:
                    phisum = math.log(math.exp(phisum) + math.exp(phi[w, z]))
                else:
                    phisum = phi[w, z]
            for z in range(0, K):
                phi[w, z] = math.exp(phi[w, z] - phisum)
                gamma[d][z] =  gamma[d][z] + docs[d].itemCountList[w] * (phi[w, z] - oldphi[z])
                digamma_gamma[z] = psi(gamma[d][z])


# calculate the gamma parameter of new document
项目:megamix    作者:14thibea    | 项目源码 | 文件源码
def _step_E(self, points):
        """
        In this step the algorithm evaluates the responsibilities of each points in each cluster

        Parameters
        ----------
        points : an array (n_points,dim)

        Returns
        -------
        log_resp: an array (n_points,n_components)
            an array containing the logarithm of the responsibilities.
        log_prob_norm : an array (n_points,)
            logarithm of the probability of each sample in points

        """

        n_points,dim = points.shape

        log_gaussian = _log_normal_matrix(points,self.means,self.cov,'full',self.n_jobs)
        log_gaussian -= 0.5 * dim * np.log(self.nu)
        digamma_sum = np.sum(psi(.5 * (self.nu - np.arange(0, dim)[:,np.newaxis])),0)
        log_lambda = digamma_sum + dim * np.log(2)

        log_prob = self.log_weights + log_gaussian + 0.5 * (log_lambda - dim / self.beta)

        log_prob_norm = logsumexp(log_prob, axis=1)
        log_resp = log_prob - log_prob_norm[:,np.newaxis]

        return log_prob_norm,log_resp
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_dirichlet_expectation():
    """Test Cython version of Dirichlet expectation calculation."""
    x = np.logspace(-100, 10, 10000)
    expectation = np.empty_like(x)
    _dirichlet_expectation_1d(x, 0, expectation)
    assert_allclose(expectation, np.exp(psi(x) - psi(np.sum(x))),
                    atol=1e-19)

    x = x.reshape(100, 100)
    assert_allclose(_dirichlet_expectation_2d(x),
                    psi(x) - psi(np.sum(x, axis=1)[:, np.newaxis]),
                    rtol=1e-11, atol=3e-9)
项目:SEVDL_MGP    作者:AMLab-Amsterdam    | 项目源码 | 文件源码
def perform(self, node, inputs, output_storage):
        x = inputs[0]
        z = output_storage[0]
        z[0] = sp.psi(x)
项目:NAIBR    作者:raphael-group    | 项目源码 | 文件源码
def mleFun(self, par, data, sm):
        p = par[0]
        r = par[1]
        n = len(data)
        f0 = sm/(r+sm)-p
        f1 = np.sum(special.psi(data+r)) - n*special.psi(r) + n*np.log(r/(r+sm))
        return np.array([f0, f1])
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def _mi_dc(x, y, k):
    """
    Calculates the mututal information between a continuous vector x and a
    disrete class vector y.

    This implementation can calculate the MI between the joint distribution of
    one or more continuous variables (X[:, 1:3]) with a discrete variable (y).

    Thanks to Adam Pocock, the author of the FEAST package for the idea.

    Brian C. Ross, 2014, PLOS ONE
    Mutual Information between Discrete and Continuous Data Sets
    """

    y = y.flatten()
    n = x.shape[0]
    classes = np.unique(y)
    knn = NearestNeighbors(n_neighbors=k)
    # distance to kth in-class neighbour
    d2k = np.empty(n)
    # number of points within each point's class
    Nx = []
    for yi in y:
        Nx.append(np.sum(y == yi))

    # find the distance of the kth in-class point
    for c in classes:
        mask = np.where(y == c)[0]
        knn.fit(x[mask, :])
        d2k[mask] = knn.kneighbors()[0][:, -1]

    # find the number of points within the distance of the kth in-class point
    knn.fit(x)
    m = knn.radius_neighbors(radius=d2k, return_distance=False)
    m = [i.shape[0] for i in m]

    # calculate MI based on Equation 2 in Ross 2014
    MI = psi(n) - np.mean(psi(Nx)) + psi(k) - np.mean(psi(m))
    return MI
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def _entropy(X, k=1):
    """
    Returns the entropy of the X.

    Written by Gael Varoquaux:
    https://gist.github.com/GaelVaroquaux/ead9898bd3c973c40429

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        The data the entropy of which is computed
    k : int, optional
        number of nearest neighbors for density estimation

    References
    ----------
    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
    of a random vector. Probl. Inf. Transm. 23, 95-101.
    See also: Evans, D. 2008 A computationally efficient estimator for
    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
    and:

    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
    information. Phys Rev E 69(6 Pt 2):066138.

    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
    for Continuous Random Variables. Advances in Neural Information
    Processing Systems 21 (NIPS). Vancouver (Canada), December.
    return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)

    """

    # Distance to kth nearest neighbor
    r = _nearest_distances(X, k)
    n, d = X.shape
    volume_unit_ball = (np.pi ** (.5 * d)) / gamma(.5 * d + 1)
    return (d * np.mean(np.log(r + np.finfo(X.dtype).eps)) +
            np.log(volume_unit_ball) + psi(n) - psi(k))
项目:adversarial-variational-bayes    作者:gdikov    | 项目源码 | 文件源码
def estimation(self, y):
        """ Estimate Shannon entropy.

        Parameters
        ----------
        y : (number of samples, dimension)-ndarray
            One row of y corresponds to one sample.

        Returns
        -------
        h : float
            Estimated Shannon entropy.

        References
        ----------
        M. N. Goria, Nikolai N. Leonenko, V. V. Mergel, and P. L. Novi 
        Inverardi. A new class of random vector entropy estimators and its 
        applications in testing statistical hypotheses. Journal of 
        Nonparametric Statistics, 17: 277-297, 2005. (S={k})

        Harshinder Singh, Neeraj Misra, Vladimir Hnizdo, Adam Fedorowicz
        and Eugene Demchuk. Nearest neighbor estimates of entropy.
        American Journal of Mathematical and Management Sciences, 23,
        301-321, 2003. (S={k})

        L. F. Kozachenko and Nikolai N. Leonenko. A statistical estimate
        for the entropy of a random vector. Problems of Information
        Transmission, 23:9-16, 1987. (S={1})

        Examples
        --------
        h = co.estimation(y)

        """

        num_of_samples, dim = y.shape
        distances_yy = knn_distances(y, y, True, self.knn_method, self.k,
                                     self.eps, 2)[0]
        v = volume_of_the_unit_ball(dim)
        distances_yy[:, self.k - 1][distances_yy[:, self.k-1] == 0] = 1e-6
        h = log(num_of_samples - 1) - psi(self.k) + log(v) + \
            dim * sum(log(distances_yy[:, self.k-1])) / num_of_samples

        return h
项目:megamix    作者:14thibea    | 项目源码 | 文件源码
def _step_M(self,points,log_resp):
        """
        In this step the algorithm updates the values of the parameters (means, covariances,
        alpha, beta, nu).

        Parameters
        ----------
        points : an array (n_points,dim)

        log_resp: an array (n_points,n_components)
            an array containing the logarithm of the responsibilities.

        """

        n_points,dim = points.shape

        resp = np.exp(log_resp)

        # Convenient statistics
        N = np.sum(resp,axis=0) + 10*np.finfo(resp.dtype).eps            #Array (n_components,)
        X_barre = 1/N[:,np.newaxis] * np.dot(resp.T,points)              #Array (n_components,dim)
        S = _full_covariance_matrices(points,X_barre,N,resp,self.reg_covar,self.n_jobs)

        #Parameters update
        self.alpha = np.asarray([1.0 + N,
                                  self.alpha_0 + np.hstack((np.cumsum(N[::-1])[-2::-1], 0))]).T
        self.alpha += np.asarray([-self.pypcoeff * np.ones(self.n_components),
                                      self.pypcoeff * np.arange(self.n_components)]).T
        self.beta = self.beta_0 + N
        self.nu = self.nu_0 + N

        # Weights update
        for i in range(self.n_components):
            if i==0:
                self.log_weights[i] = psi(self.alpha[i][0]) - psi(np.sum(self.alpha[i]))
            else:
                self.log_weights[i] = psi(self.alpha[i][0]) - psi(np.sum(self.alpha[i]))
                self.log_weights[i] += self.log_weights[i-1] + psi(self.alpha[i-1][1]) - psi(self.alpha[i-1][0])

        # Means update
        means = self.beta_0 * self._means_prior + N[:,np.newaxis] * X_barre
        self.means = means * np.reciprocal(self.beta)[:,np.newaxis]
        self.means_estimated = self.means

        # Covariance update
        for i in range(self.n_components):
            diff = X_barre[i] - self._means_prior
            product = self.beta_0 * N[i]/self.beta[i] * np.outer(diff,diff)
            self._inv_prec[i] = self._inv_prec_prior + N[i] * S[i] + product

            det_inv_prec = np.linalg.det(self._inv_prec[i])
            self._log_det_inv_prec[i] = np.log(det_inv_prec)
            self.cov[i] = self._inv_prec[i] / self.nu[i]