Python sklearn.cluster 模块,FeatureAgglomeration() 实例源码

我们从Python开源项目中,提取了以下6个代码示例,用于说明如何使用sklearn.cluster.FeatureAgglomeration()

项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_linkage_misc():
    # Misc tests on linkage
    rng = np.random.RandomState(42)
    X = rng.normal(size=(5, 5))
    assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
    assert_raises(ValueError, linkage_tree, X, linkage='foo')
    assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))

    # Smoke test FeatureAgglomeration
    FeatureAgglomeration().fit(X)

    # test hierarchical clustering on a precomputed distances matrix
    dis = cosine_distances(X)

    res = linkage_tree(dis, affinity="precomputed")
    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])

    # test hierarchical clustering on a precomputed distances matrix
    res = linkage_tree(X, affinity=manhattan_distances)
    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_ward_agglomeration():
    # Check that we obtain the correct solution in a simplistic case
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rng.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
项目:PortfolioTimeSeriesAnalysis    作者:MizioAnd    | 项目源码 | 文件源码
def feature_agglomeration(df, number_of_clusters=int(df.shape[1] / 1.2)):
        df = df.copy()
        # Todo: find optimal number of clusters for the feature clustering
        # number_of_clusters = int(df.shape[1]/2)

        agglomerated_features = FeatureAgglomeration(n_clusters=number_of_clusters)
        if any(tuple(df.columns == 'Call Outcome')):
            res = agglomerated_features.fit_transform(np.reshape(np.array(df.dropna().values), df.dropna()
                                                                 .shape), y=df['Call Outcome'].values)
        else:
            res = agglomerated_features.fit_transform(np.reshape(np.array(df.values), df.shape))
        df = pd.DataFrame(data=res)
        return df
项目:PortfolioTimeSeriesAnalysis    作者:MizioAnd    | 项目源码 | 文件源码
def dendrogram(df, number_of_clusters=int(df.shape[1] / 1.2)):
        # Create Dendrogram
        agglomerated_features = FeatureAgglomeration(n_clusters=number_of_clusters)
        used_networks = np.arange(0, number_of_clusters, dtype=int)

        # Create a custom palette to identify the networks
        network_pal = sns.cubehelix_palette(len(used_networks),
                                            light=.9, dark=.1, reverse=True,
                                            start=1, rot=-2)
        network_lut = dict(zip(map(str, df.columns), network_pal))

        # Convert the palette to vectors that will be drawn on the side of the matrix
        networks = df.columns.get_level_values(None)
        network_colors = pd.Series(networks, index=df.columns).map(network_lut)
        sns.set(font="monospace")
        # Create custom colormap
        cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
        cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
                            col_colors=network_colors)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
        plt.show()
项目:HousePrices    作者:MizioAnd    | 项目源码 | 文件源码
def feature_agglomeration(df):
        df = df.copy()
        # Todo: find optimal number of clusters for the feature clustering
        # number_of_clusters = int(df.shape[1]/2)
        number_of_clusters = int(df.shape[1] / 1.2)
        from sklearn.cluster import FeatureAgglomeration
        agglomerated_features = FeatureAgglomeration(n_clusters=number_of_clusters)
        # mask = ~df[features].isnull()
        # mask_index = mask[mask == 1].index
        if any(tuple(df.columns == 'SalePrice')):
            # res = agglomerated_features.fit_transform(np.reshape(np.array(df[HousePrices._feature_names_num.values]
            #                                                               [mask].values),
            #                                                      df[HousePrices._feature_names_num.values][mask]
            #                                                      .shape), y=df.SalePrice.values).toarray()

            res = agglomerated_features.fit_transform(np.reshape(np.array(df.dropna().values), df.dropna()
                                                                 .shape), y=df.SalePrice.values)
        else:
            # res = agglomerated_features.fit_transform(np.reshape(np.array(df.dropna().values), df.dropna()
            #                                                      .shape))
            res = agglomerated_features.fit_transform(np.reshape(np.array(df.values), df.shape))

        # Todo: in case of adding values using df.loc[], remember mask is only possible for a single feature at a time.
        print(''.join(['labels:', str(agglomerated_features.labels_)]))
        print(''.join(['Children:', str(agglomerated_features.children_)]))
        print(''.join(['number of leaves in the hierarchical tree:', str(agglomerated_features.n_leaves_)]))
        HousePrices.dendrogram(df, number_of_clusters, agglomerated_features.labels_)
        df = pd.DataFrame(data=res)
        return df
项目:PyBASC    作者:AkiNikolaidis    | 项目源码 | 文件源码
def data_compression(fmri_masked, mask_img, mask_np, output_size):
    """
    data : array_like
         A matrix of shape (`V`, `N`) with `V` voxels `N` timepoints
         The functional dataset that needs to be reduced
    mask : a numpy array of the mask
    output_size : integer
        The number of elements that the data should be reduced to

    """


## Transform nifti files to a data matrix with the NiftiMasker
    import time
    from nilearn import input_data

    datacompressiontime=time.time()
    nifti_masker = input_data.NiftiMasker(mask_img= mask_img, memory='nilearn_cache',
                                          mask_strategy='background', memory_level=1,
                                          standardize=False)

    ward=[]

# Perform Ward clustering
    from sklearn.feature_extraction import image
    shape = mask_np.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                       n_z=shape[2], mask=mask_np)


    from sklearn.cluster import FeatureAgglomeration
    start = time.time()
    ward = FeatureAgglomeration(n_clusters=output_size, connectivity=connectivity,
                            linkage='ward')
    ward.fit(fmri_masked)
    #print("Ward agglomeration compressing voxels into clusters: %.2fs" % (time.time() - start))


    labels = ward.labels_

    #print ('Extracting reduced Dimension Data')
    data_reduced = ward.transform(fmri_masked)
    fmri_masked=[]
    #print('Data compression took ', (time.time()- datacompressiontime), ' seconds')
    return {'data':data_reduced, 'labels':labels}