Python sklearn.decomposition 模块，LatentDirichletAllocation() 实例源码

我们从Python开源项目中，提取了以下36个代码示例，用于说明如何使用sklearn.decomposition.LatentDirichletAllocation()。

项目：Trendster 作者：rawanhassunah | 项目源码 | 文件源码

def lda(X, n_topics=None):
    model = LatentDirichletAllocation(n_topics)
    X_new = model.fit_transform(X)
    components = model.components_
    return X_new, components

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_lda_preplexity_mismatch():
    # test dimension mismatch in `perplexity` method
    rng = np.random.RandomState(0)
    n_topics = rng.randint(3, 6)
    n_samples = rng.randint(6, 10)
    X = np.random.randint(4, size=(n_samples, 10))
    lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=5.,
                                    total_samples=20, random_state=rng)
    lda.fit(X)
    # invalid samples
    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_topics))
    assert_raises_regexp(ValueError, r'Number of samples', lda.perplexity, X,
                         invalid_n_samples)
    # invalid topic number
    invalid_n_topics = rng.randint(4, size=(n_samples, n_topics + 1))
    assert_raises_regexp(ValueError, r'Number of topics', lda.perplexity, X,
                         invalid_n_topics)

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_lda_perplexity():
    # Test LDA perplexity for batch training
    # perplexity should be lower after each iteration
    n_topics, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
                                          learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
                                          learning_method=method,
                                          total_samples=100, random_state=0)
        distr_1 = lda_1.fit_transform(X)
        perp_1 = lda_1.perplexity(X, distr_1, sub_sampling=False)

        distr_2 = lda_2.fit_transform(X)
        perp_2 = lda_2.perplexity(X, distr_2, sub_sampling=False)
        assert_greater_equal(perp_1, perp_2)

        perp_1_subsampling = lda_1.perplexity(X, distr_1, sub_sampling=True)
        perp_2_subsampling = lda_2.perplexity(X, distr_2, sub_sampling=True)
        assert_greater_equal(perp_1_subsampling, perp_2_subsampling)

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_lda_score():
    # Test LDA score for batch training
    # score should be higher after each iteration
    n_topics, X = _build_sparse_mtx()
    for method in ('online', 'batch'):
        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
                                          learning_method=method,
                                          total_samples=100, random_state=0)
        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
                                          learning_method=method,
                                          total_samples=100, random_state=0)
        lda_1.fit_transform(X)
        score_1 = lda_1.score(X)

        lda_2.fit_transform(X)
        score_2 = lda_2.score(X)
        assert_greater_equal(score_2, score_1)

项目：hh-page-classifier 作者：TeamHG-Memex | 项目源码 | 文件源码

def LDAPageVctorizer(*,
                     n_topics: int,
                     min_df: int,
                     max_features: int,
                     max_iter: int,
                     ngram_range: Tuple[int, int],
                     vocabulary=None,
                     batch_size: int=4096,
                     verbose=1):
    vec = _vectorizer(min_df=min_df, max_features=max_features,
                      ngram_range=ngram_range, vocabulary=vocabulary)
    lda = LatentDirichletAllocation(
        learning_method='online',
        n_topics=n_topics,
        batch_size=batch_size,
        evaluate_every=2,
        verbose=verbose,
        max_iter=max_iter,
        n_jobs=1,
    )
    return make_pipeline(vec, lda)

项目：AND4NMF 作者：PrincetonML | 项目源码 | 文件源码

def train(self):
        D = self.A_true.shape[1]
        for i in range(20):
            self.show_error()

            start = time.time()
            prior = self.sparsity / np.float(self.A_true.shape[1])
            lda = LDA(n_topics=D, random_state=0, doc_topic_prior = prior, max_iter=i)
            lda.fit(self.Y.transpose())
            end = time.time()
            self.time = end - start
            self.A = np.asmatrix(lda.components_.transpose())