Python seaborn 模块，clustermap() 实例源码

我们从Python开源项目中，提取了以下13个代码示例，用于说明如何使用seaborn.clustermap()。

项目：word2vec_pipeline 作者：NIHOPA | 项目源码 | 文件源码

def plot_heatmap():

    data = load_dispersion_data()
    linkage = data["linkage"]

    sns.set_context("notebook", font_scale=1.25)
    p = sns.clustermap(data=data["dispersion"],
                       row_linkage=linkage,
                       col_linkage=linkage,
                       vmin=0.50,
                       vmax=1.00,
                       cmap=cmap_clustermap,
                       figsize=(12, 10))

    labels = p.data2d.columns

    # Sanity check, make sure the plotted dendrogram matches the saved values
    assert((labels == data["dendrogram_order"]).all())

项目：scikit-discovery 作者：MITHaystack | 项目源码 | 文件源码

def process(self, obj_data):
        '''
        Produces a cluster map and stores the linkage results.

        @param obj_data: Data wrapper
        '''

        import seaborn as sns

        data = obj_data.getResults()[self.obj_name]

        linkage = sp.cluster.hierarchy.linkage(data, method='average')

        plt.figure()

        g = sns.clustermap(data, col_linkage = linkage, row_linkage=linkage)
        for item in g.ax_heatmap.get_yticklabels():
            item.set_rotation(0)


        plt.figure()

        sp.cluster.hierarchy.dendrogram(linkage)

        obj_data.addResult(self.str_description, linkage)

项目：VASC 作者：wang-research | 项目源码 | 文件源码

def print_heatmap( points,label,id_map ):
    '''
    points: N_samples * N_features
    label: (int) N_samples
    id_map: map label id to its name
    '''
    # = sns.color_palette("RdBu_r", max(label)+1)
    #cNorm = colors.Normalize(vmin=0,vmax=max(label)) #normalise the colormap
    #scalarMap = cm.ScalarMappable(norm=cNorm,cmap='Paired') #map numbers to colors

    index = [id_map[i] for i in label]
    df = DataFrame( 
            points,
            columns = list(range(points.shape[1])),
            index = index
            )
    row_color = [current_palette[i] for i in label]

    cmap = sns.cubehelix_palette(as_cmap=True, rot=-.3, light=1)
    g = sns.clustermap( df,cmap=cmap,row_colors=row_color,col_cluster=False,xticklabels=False,yticklabels=False) #,standard_scale=1 )

    return g.fig

项目：PyMaid 作者：schlegelp | 项目源码 | 文件源码

def plot_matrix2(self, labels=None, **kwargs):
        """ Plot distance matrix and dendrogram using seaborn. This package
        needs to be installed manually.

        Parameters
        ----------
        kwargs      dict
                    Keyword arguments to be passed to seaborn.clustermap. See 
                    http://seaborn.pydata.org/generated/seaborn.clustermap.html


        Returns
        -------
        seaborn.clustermap
        """

        try:
            import seaborn as sns
        except:
            raise ImportError('Need seaborn package installed.')

        cg = sns.clustermap(
            self.mat, row_linkage=self.linkage, col_linkage=self.linkage, **kwargs)

        # Rotate labels
        plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)

        # Make labels smaller
        plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), fontsize=4)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), fontsize=4)

        # Increase padding
        cg.fig.subplots_adjust(right=.8, top=.95, bottom=.2)

        module_logger.info(
            'Use matplotlib.pyplot.show() to render figure.')

        return cg

项目：IgDiscover 作者：NBISweden | 项目源码 | 文件源码

def plot_clustermap(sequences, title, plotpath, size=300, dpi=200):
    """
    Plot a clustermap of the given sequences

    size -- Downsample to this many sequences
    title -- plot title

    Return the number of clusters.
    """
    logger.info('Clustering %d sequences (downsampled to at most %d)', len(sequences), size)
    sequences = downsampled(sequences, size)
    df, linkage, clusters = cluster_sequences(sequences)

    palette = sns.color_palette([(0.15, 0.15, 0.15)])
    palette += sns.color_palette('Spectral', n_colors=max(clusters), desat=0.9)
    row_colors = [ palette[cluster_id] for cluster_id in clusters ]
    cm = sns.clustermap(df,
            row_linkage=linkage,
            col_linkage=linkage,
            row_colors=row_colors,
            linewidths=None,
            linecolor='none',
            figsize=(210/25.4, 210/25.4),
            cmap='Blues',
            xticklabels=False,
            yticklabels=False
    )
    if title is not None:
        cm.fig.suptitle(title)
    cm.savefig(plotpath, dpi=dpi)

    # free the memory used by the plot
    import matplotlib.pyplot as plt
    plt.close('all')

    return len(set(clusters))

项目：PortfolioTimeSeriesAnalysis 作者：MizioAnd | 项目源码 | 文件源码

def dendrogram(df, number_of_clusters=int(df.shape[1] / 1.2)):
        # Create Dendrogram
        agglomerated_features = FeatureAgglomeration(n_clusters=number_of_clusters)
        used_networks = np.arange(0, number_of_clusters, dtype=int)

        # Create a custom palette to identify the networks
        network_pal = sns.cubehelix_palette(len(used_networks),
                                            light=.9, dark=.1, reverse=True,
                                            start=1, rot=-2)
        network_lut = dict(zip(map(str, df.columns), network_pal))

        # Convert the palette to vectors that will be drawn on the side of the matrix
        networks = df.columns.get_level_values(None)
        network_colors = pd.Series(networks, index=df.columns).map(network_lut)
        sns.set(font="monospace")
        # Create custom colormap
        cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
        cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
                            col_colors=network_colors)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
        plt.show()

项目：guesswhat 作者：GuessWhatGame | 项目源码 | 文件源码

def __init__(self, path, games, logger, suffix):
        super(WordCoocurence, self).__init__(path, self.__class__.__name__, suffix)

        questions = []
        word_counter = collections.Counter()

        NO_WORDS_TO_DISPLAY = 50

        for game in games:
            # split questions into words
            for q in game.questions:
                questions.append(q)
                q = re.sub('[?]', '', q)
                words = re.findall(r'\w+', q)

                for w in words:
                    word_counter[w.lower()] += 1


        # compute word co-coocurrence
        common_words = word_counter.most_common(NO_WORDS_TO_DISPLAY)
        common_words = [pair[0] for pair in common_words]
        corrmat = np.zeros((NO_WORDS_TO_DISPLAY, NO_WORDS_TO_DISPLAY))

        # compute the correlation matrices
        for i, question in enumerate(questions):
            for word in question:
                if word in common_words:
                    for other_word in question:
                        if other_word in common_words:
                            if word != other_word:
                                corrmat[common_words.index(word)][common_words.index(other_word)] += 1.

        # Display the cor matrix
        df = pd.DataFrame(data=corrmat, index=common_words, columns=common_words)
        f = sns.clustermap(df, standard_scale=0, col_cluster=False, row_cluster=True, cbar_kws={"label": "co-occurence"})
        f.ax_heatmap.xaxis.tick_top()

        plt.setp(f.ax_heatmap.get_xticklabels(), rotation=90)
        plt.setp(f.ax_heatmap.get_yticklabels(), rotation=0)

项目：HousePrices 作者：MizioAnd | 项目源码 | 文件源码

def dendrogram(df, number_of_clusters, agglomerated_feature_labels):
        import seaborn as sns
        # Todo: Create Dendrogram
        # used networks are the labels occuring in agglomerated_features.labels_
        # which corresponds to np.arange(0, number_of_clusters)
        # number_of_clusters = int(df.shape[1] / 1.2)
        # used_networks = np.arange(0, number_of_clusters, dtype=int)
        used_networks = np.unique(agglomerated_feature_labels)
        # used_networks = [1, 5, 6, 7, 8, 11, 12, 13, 16, 17]

        # In our case all columns are clustered, which means used_columns is true in every element
        # used_columns = (df.columns.get_level_values(None)
                        # .astype(int)
                        # .isin(used_networks))
        # used_columns = (agglomerated_feature_labels.astype(int).isin(used_networks))
        # df = df.loc[:, used_columns]

        # Create a custom palette to identify the networks
        network_pal = sns.cubehelix_palette(len(used_networks),
                                            light=.9, dark=.1, reverse=True,
                                            start=1, rot=-2)
        network_lut = dict(zip(map(str, df.columns), network_pal))

        # Convert the palette to vectors that will be drawn on the side of the matrix
        networks = df.columns.get_level_values(None)
        # networks = agglomerated_feature_labels
        network_colors = pd.Series(networks, index=df.columns).map(network_lut)
        # plt.figure()
        # cg = sns.clustermap(df, metric="correlation")
        # plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        sns.set(font="monospace")
        # Create custom colormap
        cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
        cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
                            col_colors=network_colors)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
        # plt.xticks(rotation=90)
        plt.show()

项目：mriqc 作者：poldracklab | 项目源码 | 文件源码

def plot_corrmat(in_csv, out_file=None):
    import seaborn as sn
    sn.set(style="whitegrid")

    dataframe = pd.read_csv(in_csv, index_col=False, na_values='n/a', na_filter=False)
    colnames = dataframe.columns.ravel().tolist()

    for col in ['subject_id', 'site', 'modality']:
        try:
            colnames.remove(col)
        except ValueError:
            pass

    # Correlation matrix
    corr = dataframe[colnames].corr()
    corr = corr.dropna((0,1), 'all')

    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Generate a custom diverging colormap
    cmap = sn.diverging_palette(220, 10, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    corrplot = sn.clustermap(corr, cmap=cmap, center=0., method='average', square=True, linewidths=.5)
    plt.setp(corrplot.ax_heatmap.yaxis.get_ticklabels(), rotation='horizontal')
    # , mask=mask, square=True, linewidths=.5, cbar_kws={"shrink": .5})

    if out_file is None:
        out_file = 'corr_matrix.svg'

    fname, ext = op.splitext(out_file)
    if ext[1:] not in ['pdf', 'svg', 'png']:
        ext = '.svg'
        out_file = fname + '.svg'

    corrplot.savefig(out_file, format=ext[1:], bbox_inches='tight', pad_inches=0, dpi=100)
    return corrplot

项目：cgpm 作者：probcomp | 项目源码 | 文件源码

def plot_clustermap(D, xticklabels=None, yticklabels=None):
    import seaborn as sns
    if xticklabels is None: xticklabels = range(D.shape[0])
    if yticklabels is None: yticklabels = range(D.shape[1])
    zmat = sns.clustermap(
        D, yticklabels=yticklabels, xticklabels=xticklabels,
        linewidths=0.2, cmap='BuGn')
    plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0)
    plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90)
    return zmat

项目：intervene 作者：asntech | 项目源码 | 文件源码

def heatmap_dendrogram(dataframe, outfile, options):
    '''
    Create a full clustered heatmap using Seaborn 

    '''
    if options.corr:
        sns_plot = sns.clustermap(dataframe, cmap="RdBu", linewidths=.3, method='complete', metric='euclidean')
    else:
        sns_plot = sns.clustermap(dataframe, cmap="RdBu", linewidths=.3)

    sns.plt.setp(sns_plot.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    sns.plt.suptitle(options.hlabel)

    sns_plot.savefig(outfile, bbox_inches='tight', dpi=options.dpi)

项目：sentisignal 作者：jonathanmanfield | 项目源码 | 文件源码

def plot_clustermap(df):
    # corr = df.corr()
    # yticks = corr.index

    # sns.clustermap(corr, 'yticklabels=yticks')
    cg=sns.clustermap(df.corr())
    # plt.yticks(rotation=0)
    plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)    
    # plt.show()

项目：crop-seq 作者：epigen | 项目源码 | 文件源码

def inspect_bulk(df, df_bulk, de_genes, de_genes_bulk):
    """
    """
    quant_types = [("bitseq", df_bulk)]

    for quant_type, exp_matrix in quant_types:
        print(quant_type)

        # Boxplots of expression
        fig, axis = plt.subplots(1)
        sns.boxplot(data=pd.melt(exp_matrix), x="grna", y="value", hue="condition", ax=axis)
        fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.expression_boxplots.png".format(quant_type)), dpi=300, bbox_inches="tight")

        # Heatmap and correlation on signature genes
        # derived from bulk
        # derived from scRNA
        for geneset in ["de_genes", "de_genes_bulk"]:
            g = sns.clustermap(
                exp_matrix.ix[eval(geneset)].dropna(),
                z_score=0,
                row_cluster=True, col_cluster=True,
                xticklabels=True, yticklabels=True,
                figsize=(15, 15))
            for item in g.ax_heatmap.get_yticklabels():
                item.set_rotation(0)
            for item in g.ax_heatmap.get_xticklabels():
                item.set_rotation(90)
            g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")

            g = sns.clustermap(
                exp_matrix.ix[eval(geneset)].dropna().corr(),
                row_cluster=True, col_cluster=True,
                xticklabels=True, yticklabels=True,
                figsize=(15, 15))
            for item in g.ax_heatmap.get_yticklabels():
                item.set_rotation(0)
            for item in g.ax_heatmap.get_xticklabels():
                item.set_rotation(90)
            g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.correlation.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")