我们从Python开源项目中,提取了以下15个代码示例,用于说明如何使用seaborn.factorplot()。
def plot_count_fig(tasks): """ Create count plot, as a 2-row x 3-col bar plot of data points for each k in each covar. Parameters ---------- tasks: list(dict) Returns ------- Matplotlib Figure object. """ sns.set(context='talk', style='whitegrid') df = pd.DataFrame(filter_dict_list_by_keys(tasks, ['k', 'covar_type', 'covar_tied'])) df = df.loc[:, ['k', 'covar_type', 'covar_tied', 'bic', 'aic']] df['covar_type'] = [x.capitalize() for x in df['covar_type']] df['covar_tied'] = [['Untied', 'Tied'][x] for x in df['covar_tied']] f = sns.factorplot(x='k', kind='count', col='covar_type', row='covar_tied', data=df, row_order=['Tied', 'Untied'], col_order=['Full', 'Diag', 'Spher'], legend=True, legend_out=True, palette='Blues_d') f.set_titles("{col_name}-{row_name}") f.set_xlabels("Num. of Clusters (K)") return f.fig
def plot_grid_scores(model, x, y=None, hue=None, row=None, col=None, col_wrap=None, **kwargs): ''' Wrapper around seaborn.factorplot. Parameters ---------- model : Pipeline or Estimator x, hue, row, col : str parameters grid searched over y : str the target of interest, default `'mean_'` Returns ------- g : seaborn.FacetGrid ''' scores = unpack_grid_scores(model) y = y or 'mean_' return sns.factorplot(x=x, y=y, hue=hue, row=row, col=col, data=scores, col_wrap=col_wrap, **kwargs)
def denovo_plot(consensus_data, ordered_genomes, denovo_tgt): with denovo_tgt.open('w') as outf, PdfPages(outf) as pdf: df = json_biotype_nested_counter_to_df(consensus_data, 'denovo') # fix column names because json_biotype_nested_counter_to_df makes assumptions df.columns = ['Result', 'Number of transcripts', 'Augustus mode', 'genome'] has_pb = len(set(df['Augustus mode'])) == 2 if len(set(df.genome)) > 1: # if we ran in PB only, we may not have multiple genomes if has_pb is True: ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', kind='bar', col='Result', hue='Augustus mode', col_wrap=2, row_order=ordered_genomes, sharex=True, sharey=False) else: ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', kind='bar', col='Result', col_wrap=2, row_order=ordered_genomes, sharex=True, sharey=False) else: if has_pb is True: ax = sns.factorplot(data=df, x='Result', y='Number of transcripts', kind='bar', hue='Augustus mode') else: ax = sns.factorplot(data=df, x='Result', y='Number of transcripts', kind='bar') ax.set_xticklabels(rotation=90) ax.fig.suptitle('Incorporation of de-novo predictions') ax.fig.subplots_adjust(top=0.9) multipage_close(pdf, tight_layout=False)
def _plotWeekdayStats(stats, columns, groupBy=True): dataToPlot = stats.copy() # Group by weekday and rename date column if groupBy: dataToPlot = dataToPlot.groupby(stats['date'].dt.weekday).mean() dataToPlot = dataToPlot.reset_index().rename(columns={'date':'weekday'}) # change stats from columns to row attribute dataToPlot = pd.melt(dataToPlot, id_vars=['weekday'], value_vars=columns, var_name='stats', value_name='val') # Rename stats and weekdays dataToPlot['stats'].replace(NAMES, inplace=True) dataToPlot['weekday'].replace(dayOfWeek, inplace=True) # Plot g = sns.factorplot(data=dataToPlot, x="weekday", y="val", col="stats", order=dayOfWeekOrder, kind="point", sharey=False, col_wrap=3) g.set_xticklabels(rotation=45) g.set(xlabel='') return g #sns.plt.show()
def plotYearAndMonthStatsSleep(stats, columns=None): """ Plot aggregated (mean) stats by year and month. :param stats: data to plot """ if not columns: columns = ['sleep_efficiency', 'sleep_hours'] dataToPlot = _prepareYearAndMonthStats(stats, columns) # Plot g = sns.factorplot(data=dataToPlot, x="date", y="val", row="stats", kind="point", sharey=False) g.set_xticklabels(rotation=45) for ax in g.axes.flat: ax.grid(b=True) return g #sns.plt.show()
def plot_aic_bic_fig(tasks): """ Creates AIC-BIC plot, as a 2-row x 3-col grid of point plots with 95% confidence intervals. Parameters ---------- tasks: list(dict) Returns ------- Matplotlib Figure object """ sns.set(context='talk', style='whitegrid') # Filter list of dicts to reduce the size of Pandas DataFrame df = pd.DataFrame(filter_dict_list_by_keys(tasks, ['k', 'covar_type', 'covar_tied', 'bic', 'aic'])) df['covar_type'] = [x.capitalize() for x in df['covar_type']] df['covar_tied'] = [['Untied', 'Tied'][x] for x in df['covar_tied']] df['aic'] = df['aic'].astype('float') df['bic'] = df['bic'].astype('float') df = pd.melt(df, id_vars=['k', 'covar_type', 'covar_tied'], value_vars=['aic', 'bic'], var_name='metric') f = sns.factorplot(x='k', y='value', col='covar_type', row='covar_tied', hue='metric', data=df, row_order=['Tied', 'Untied'], col_order=['Full', 'Diag', 'Spher'], legend=True, legend_out=True, ci=95, n_boot=100) f.set_titles("{col_name}-{row_name}") f.set_xlabels("Num. of Clusters (K)") return f.fig
def mem_svg(self, table, column, outfile): import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import seaborn as sb sb.set(style="whitegrid") svgdat = (table. rename(columns={ column : 'Memory' }). groupby(['Dataset', 'Threads', 'Program']). agg({ 'Memory' : max }). reset_index()) svgdat = svgdat.assign(MemoryMB=svgdat['Memory'] / 1000000) threads = svgdat.Threads.unique() if len(threads) == 1: plot = sb.factorplot( x='Program', y='MemoryMB', col="Dataset", data=svgdat, kind="bar", ci=None, sharey=True) else: plot = sb.factorplot( x='Threads', y='MemoryMB', col="Dataset", hue="Program", data=svgdat, kind="bar", ci=None, sharey=True) if len(threads) == 1: plot = plot.set_titles('') plot = plot.set_xlabels('Threads') plot = plot.set_ylabels('Memory (MB)') plot = plot.set_xticklabels(rotation=90) plot.fig.subplots_adjust(wspace=0.35) plot.savefig(outfile)
def consensus_support_plot(consensus_data, ordered_genomes, biotypes, modes, title, tgt): """grouped violin plots of original intron / intron annotation / exon annotation support""" def adjust_plot(g, this_title): g.set_xticklabels(rotation=90) g.fig.suptitle(this_title) g.fig.subplots_adjust(top=0.9) for ax in g.axes.flat: ax.set_ylabel('Percent supported') ax.set_ylim(-1, 101) dfs = [] for i, mode in enumerate(modes): df = json_to_df_with_biotype(consensus_data, mode) if i > 0: df = df[mode] dfs.append(df) df = pd.concat(dfs, axis=1) df = pd.melt(df, value_vars=modes, id_vars=['genome', 'biotype']) with tgt.open('w') as outf, PdfPages(outf) as pdf: if len(ordered_genomes) > 1: g = sns.factorplot(data=df, y='value', x='genome', col='variable', col_wrap=2, kind='violin', sharex=True, sharey=True, row_order=ordered_genomes, cut=0) else: g = sns.factorplot(data=df, y='value', x='variable', kind='violin', sharex=True, sharey=True, row_order=ordered_genomes, cut=0) adjust_plot(g, title) multipage_close(pdf, tight_layout=False) title += ' for {}' for biotype in biotypes: this_title = title.format(biotype) biotype_df = biotype_filter(df, biotype) if biotype_df is not None: if len(ordered_genomes) > 1: g = sns.factorplot(data=biotype_df, y='value', x='genome', col='variable', col_wrap=2, kind='violin', sharex=True, sharey=True, row_order=ordered_genomes, cut=0) else: g = sns.factorplot(data=df, y='value', x='variable', kind='violin', sharex=True, sharey=True, row_order=ordered_genomes, cut=0) adjust_plot(g, this_title) multipage_close(pdf, tight_layout=False)
def tm_gene_family_plot(tm_data, ordered_genomes, biotypes, gene_family_tgt): """transMap gene family collapse plots.""" try: df = json_biotype_nested_counter_to_df(tm_data, 'Gene Family Collapse') except ValueError: # no gene family collapse. probably the test set. with gene_family_tgt.open('w') as outf: pass return df['Gene Family Collapse'] = pd.to_numeric(df['Gene Family Collapse']) tot_df = df[['Gene Family Collapse', 'genome', 'count']].\ groupby(['genome', 'Gene Family Collapse']).aggregate(sum).reset_index() tot_df = tot_df.sort_values('Gene Family Collapse') with gene_family_tgt.open('w') as outf, PdfPages(outf) as pdf: g = sns.factorplot(y='count', col='genome', x='Gene Family Collapse', data=tot_df, kind='bar', col_order=ordered_genomes, col_wrap=4) g.fig.suptitle('Number of genes collapsed during gene family collapse') g.set_xlabels('Number of genes collapsed to one locus') g.set_ylabels('Number of genes') multipage_close(pdf) for biotype in biotypes: biotype_df = biotype_filter(df, biotype) if biotype_df is None: continue biotype_df = biotype_df.sort_values('Gene Family Collapse') g = sns.factorplot(y='count', col='genome', x='Gene Family Collapse', data=biotype_df, kind='bar', col_order=ordered_genomes, col_wrap=4) g.fig.suptitle('Number of genes collapsed during gene family collapse for {}'.format(biotype)) g.set_xlabels('Number of genes collapsed to one locus') g.set_ylabels('Number of genes') multipage_close(pdf)
def pb_support_plot(consensus_data, ordered_genomes, pb_genomes, pb_support_tgt): with pb_support_tgt.open('w') as outf, PdfPages(outf) as pdf: pb_genomes = [x for x in ordered_genomes if x in pb_genomes] # fix order df = json_biotype_counter_to_df(consensus_data, 'IsoSeq Transcript Validation') df.columns = ['IsoSeq Transcript Validation', 'Number of transcripts', 'genome'] ax = sns.factorplot(data=df, x='genome', y='Number of transcripts', hue='IsoSeq Transcript Validation', kind='bar', row_order=pb_genomes) ax.set_xticklabels(rotation=90) ax.fig.suptitle('Isoforms validated by at least one IsoSeq read') multipage_close(pdf, tight_layout=False)
def indel_plot(consensus_data, ordered_genomes, indel_plot_tgt): with indel_plot_tgt.open('w') as outf, PdfPages(outf) as pdf: tm_df = pd.concat([pd.DataFrame.from_dict(consensus_data[genome]['transMap Indels'], orient='index').T for genome in ordered_genomes]) tm_df['genome'] = ordered_genomes tm_df['transcript set'] = ['transMap'] * len(tm_df) consensus_df = pd.concat([pd.DataFrame.from_dict(consensus_data[genome]['Consensus Indels'], orient='index').T for genome in ordered_genomes]) consensus_df['genome'] = ordered_genomes consensus_df['transcript set'] = ['Consensus'] * len(consensus_df) df = pd.concat([consensus_df, tm_df]) df = pd.melt(df, id_vars=['genome', 'transcript set'], value_vars=['CodingDeletion', 'CodingInsertion', 'CodingMult3Indel']) df.columns = ['Genome', 'Transcript set', 'Type', 'Percent of transcripts'] g = sns.factorplot(data=df, x='Genome', y='Percent of transcripts', col='Transcript set', hue='Type', kind='bar', row_order=ordered_genomes, col_order=['transMap', 'Consensus']) g.set_xticklabels(rotation=90) g.fig.subplots_adjust(top=.8) g.fig.suptitle('Coding indels') multipage_close(pdf, tight_layout=False) ### # shared plotting functions ###
def plot_models(accs): cv_df = pd.DataFrame(accs, columns=['model','pipeline', 'score']) sns.factorplot(x='model',y='score', hue='pipeline', data=cv_df, kind='bar')
def two_way_factor_plot(dataFrame, x, y, col, row): sns.factorplot(x=x, y=y, col=col, row=row, data=dataFrame, kind='bar', margin_titles=True, size=3, aspect=.8, palette="Set3")
def factorplots(wine_set): seaborn.factorplot(x="quality", y="alcohol", data=wine_set, kind="strip") plt.xlabel("Quality level of wine, 0-10 scale") plt.ylabel("Alcohol level in wine, % ABV") if wine_set.equals(red): plt.title("Alcohol percent in each level of red wine's quality") else: plt.title("Alcohol percent in each level of white wine's quality") plt.show()
def _plotMonthlyStats(stats, columns, groupBy=True): dataToPlot = stats.copy() # Group by month and rename date column if groupBy: dataToPlot = dataToPlot.groupby(stats['date'].dt.month).mean() dataToPlot = dataToPlot.reset_index().rename(columns={'date': 'month'}) # change stats from columns to row attribute dataToPlot = pd.melt(dataToPlot, id_vars=['month'], value_vars=columns, var_name='stats', value_name='val') # Rename stats and weekdays dataToPlot['stats'].replace(NAMES, inplace=True) dataToPlot['month'].replace(months, inplace=True) order = [m for m in monthsOrder if m in dataToPlot['month'].unique()] # Plot g = sns.factorplot(data=dataToPlot, x="month", y="val", col="stats", order=order, kind="bar", sharey=False) g.set_xticklabels(rotation=45) g.set(xlabel='') return g #sns.plt.show() # def _plotMonthlyStats(stats, columns): # """ # Plot aggregated (mean) stats by month # :param stats: data to plot # :param columns: columns from stats to plot # """ # MEASURE_NAME = 'month' # months={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', # 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} # order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] # stats[MEASURE_NAME] = stats[MEASURE_NAME].map(months) # # order = [m for m in order if m in stats[MEASURE_NAME].unique()] # # f, axes = getAxes(2,2) # for i, c in enumerate(columns): # if c in NAMES: # c = NAMES[c] # g = sns.barplot(x=MEASURE_NAME, y=c, data=stats, order=order, ax=axes[i]) # g.set_xlabel('') # sns.plt.show()