我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用seaborn.boxplot()。
def boxplot_metrics(df, eval_dir): """ Create summary boxplots of all geometric measures. :param df: :param eval_dir: :return: """ boxplots_file = os.path.join(eval_dir, 'boxplots.eps') fig, axes = plt.subplots(3, 1) fig.set_figheight(14) fig.set_figwidth(7) sns.boxplot(x='struc', y='dice', hue='phase', data=df, palette="PRGn", ax=axes[0]) sns.boxplot(x='struc', y='hd', hue='phase', data=df, palette="PRGn", ax=axes[1]) sns.boxplot(x='struc', y='assd', hue='phase', data=df, palette="PRGn", ax=axes[2]) plt.savefig(boxplots_file) plt.close() return 0
def plot(params_dir): model_dirs = [name for name in os.listdir(params_dir) if os.path.isdir(os.path.join(params_dir, name))] df = defaultdict(list) for model_dir in model_dirs: df[re.sub('_bin_scaled_mono_True_ratio', '', model_dir)] = [ dd.io.load(path)['best_epoch']['validate_objective'] for path in glob.glob(os.path.join( params_dir, model_dir) + '/*.h5')] df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df.iteritems()])) df.to_csv(os.path.basename(os.path.normpath(params_dir))) plt.figure(figsize=(16, 4), dpi=300) g = sns.boxplot(df) g.set_xticklabels(df.columns, rotation=45) plt.tight_layout() plt.savefig('{}_errors_box_plot.png'.format( os.path.join(IMAGES_DIRECTORY, os.path.basename(os.path.normpath(params_dir)))))
def plot_group(data_frame, path_output): # optional import import seaborn as sns path_output_image = os.path.join(path_output, "summary_statistics.png") # # Plotting swarmplot # plt.figure(num=None, figsize=(15, 7), dpi=120) # sns.set_style("whitegrid") # # plt.title('Violin plot with single measurements') # sns.violinplot(x="Group", y="DAB+ area", data=data_frame, inner=None) # sns.swarmplot(x="Group", y="DAB+ area", data=data_frame, color="w", alpha=.5) # plt.savefig(path_output_image) # # plt.tight_layout() sns.set_style("whitegrid") sns.set_context("talk") plt.figure(num=None, figsize=(15, 7), dpi=120) plt.ylim(0, 100) plt.title('Box plot') sns.boxplot(x="Group", y="DAB+ area, %", data=data_frame) plt.tight_layout() plt.savefig(path_output_image, dpi=300)
def boxplot(self, fig_width: Number, fig_height: Number = None): """ Creates a (horizontal) box plot comparing all single object for a given property. :param fig_width: width of the figure in cm :param fig_height: height of the figure in cm, if None it is calculated from the figure width using the aesthetic ratio """ import seaborn as sns import matplotlib.pyplot as plt self.reset_plt() if fig_height is None: fig_height = self._height_for_width(fig_width) self._fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height)) df = self.get_data_frame() sns.boxplot(data=df, orient="h")
def plot_averages(df, figname, fignum, use_en_source=True, num_accs=3): plt.figure(fignum) if use_en_source: df_side = df[(df.source == 'en') & (df.target != 'en')] layers = np.concatenate([[i]*5 for i in range(5)] * num_accs) else: df_side = df[(df.source != 'en') & (df.target == 'en')] layers = list(range(5))*5*num_accs accs = get_accs_from_df(df_side, col_pref='acc') flat_accs = np.concatenate(accs) df_plot = pd.DataFrame({'Layer' : layers, 'Accuracy' : flat_accs }) #print(df_plot) sns.boxplot(x='Layer', y='Accuracy', data=df_plot) plt.tight_layout() plt.savefig(figname) return fignum + 1
def plot_averages_by_type(df, figname, fignum, use_en_source=True, pointplot=True, layer0=True): plt.figure(fignum) if use_en_source: df_side = df[(df.source == 'en') & (df.target != 'en')] else: df_side = df[(df.source != 'en') & (df.target == 'en')] if not layer0: df_side = df_side[df_side.layer != '0'] plotfunc = sns.pointplot if pointplot else sns.boxplot if pointplot: plotfunc(x='accuracy', y='relation', hue='layer', data=df_side, join=False) else: plotfunc(x='accuracy', y='relation', hue='layer', data=df_side) plt.xlabel('Accuracy') plt.ylabel('') plt.tight_layout() plt.savefig(figname) return fignum + 1
def cross_section_cndl(data, factor_name): '''??????????????? ?????????????? ?? ------------------------------ data:DataFrame(index:[Date,IDs],factor1,factor2,...) factor_name:str ''' data = data.reset_index() sns.set(style='ticks') ax = sns.boxplot(x='Date', y=factor_name, data=data, palette='PRGn') sns.despine(offset=10, trim=True) return ax # ??2 # ?????, ?????????????
def plot_author_contributions(commit_frame): sns.boxplot(x='author', y='stats_total_lines', data=commit_frame, orient='v') plt.title('Code Contributions by Authors') plt.xlabel('Author') plt.ylabel('Total Lines Committed') plt.xticks(rotation=70) plt.show()
def plot_fnc(self, *args, **kwargs): sns.boxplot(*args, **kwargs)
def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]: """ Calculates the upper and the lower whisker for a boxplot. I.e. the minimum and the maximum value of the data set the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR). IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile. Adapted from http://stackoverflow.com/a/20096945 """ q1, q2, q3 = self.quartiles() iqr = self.iqr() hi_val = q1 + whis * self.iqr() whisk_hi = np.compress(self.array <= hi_val, self.array) if len(whisk_hi) == 0 or np.max(whisk_hi) < q3: whisk_hi = q3 else: whisk_hi = max(whisk_hi) # get low extreme lo_val = q1 - whis * iqr whisk_lo = np.compress(self.array >= lo_val, self.array) if len(whisk_lo) == 0 or np.min(whisk_lo) > q1: whisk_lo = q1 else: whisk_lo = min(whisk_lo) return whisk_lo, whisk_hi
def plot_averages_by_distance(df, figname, fignum, use_en_source=True, num_accs=24, pointplot=True, hue='Distance'): plt.figure(fignum) if use_en_source: df_side = df[(df.source == 'en') & (df.target != 'en')] layers = np.concatenate([[i]*5 for i in range(5)] * num_accs) else: df_side = df[(df.source != 'en') & (df.target == 'en')] layers = list(range(5))*5*num_accs accs = get_accs_from_df(df_side, col_pref='dist') flat_accs = np.concatenate(accs) dists = np.concatenate([[pretty_dist_names_list[i]]*75 for i in range(8)]) df_plot = pd.DataFrame({'Layer' : layers, 'Accuracy' : flat_accs, 'Distance' : dists }) #print(df_plot) plotfunc = sns.pointplot if pointplot else sns.boxplot if hue == 'Distance': plotfunc(x='Layer', y='Accuracy', data=df_plot, hue='Distance') else: plotfunc(x='Distance', y='Accuracy', data=df_plot, hue='Layer') plt.xticks(range(8), pretty_dist_names_list) plt.tight_layout() plt.savefig(figname) return fignum + 1
def explore_feature_variation(self, col=None, use_target=False, **kwargs): ''' Produces univariate plots of a given set of columns. Barplots are used for categorical columns while histograms (with fitted density functinos) are used for numerical columns. If use_target is true, then the variation of the given set of columns with respect to the response variable are used (e.g., 2d scatter plots, boxplots, etc). Parameters ---------- col : a string of a column name, or a list of many columns names or None (default). If col is None, all columns will be used. use_target : bool, default False Whether to use the target column in the plots. **kwargs: additional arguments to be passed to seaborn's distplot or to pandas's plotting utilities.. ''' self._validate_params(params_list = {'col':col}, expected_types= {'col':[str,list,type(None)]}) if type(col) is str: col = [col] if col is None: col = self._get_all_features() if use_target == False: for column in col: if self.is_numeric(self.df[column]) == True: plt.figure(column) #sns.despine(left=True) sns.distplot(self.df[column], color="m", **kwargs) plt.title(column) plt.tight_layout() #plt.figure('boxplot') #sns.boxplot(x=self.df[col], palette="PRGn") #sns.despine(offset=10, trim=True) elif self.is_categorical(self.df[column]) == True: #print self.df[column].describe() plt.figure(column) #sns.despine(left=True) if len(self.df[column].unique()) > 30: self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs) #top = pd.DataFrame(data=top) #sns.barplot(y=top.index, x=top) else: self.df[column].value_counts()[::-1].plot.barh(**kwargs) #sns.countplot(y=self.df[column]) plt.title(column) plt.tight_layout() else: raise TypeError('TYPE IS NOT SUPPORTED') else: # use target variable for column in col: self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
def calculate_word_count_stats(articles: pd.DataFrame): """Calculate aggregate word count statistics on each source's articles.""" by_source = articles.groupby(['base_url'])['word_count'] by_source = by_source.agg(['count', 'mean', 'std']) by_source.sort_values('count', ascending=False, inplace=True) print_full(by_source) top_sources = by_source.head(10).index top_counts = by_source.reset_index()[by_source.index.isin(top_sources)] sns.barplot(x='base_url', y='count', data=top_counts) sns.plt.show() sns.boxplot(x='base_url', y='word_count', data=articles[articles['base_url'].isin(top_sources)]) sns.plt.show()
def show_articles_by_source(articles: pd.DataFrame): """Show boxplot comparing articles by source for fake and true news.""" by_source = (articles.groupby(['base_url', 'labels']) .size() .reset_index(name='count')) by_source = by_source[by_source['count'] > 100] sns.boxplot(x='labels', y='count', data=by_source) sns.plt.show()
def plot_box( self, fname_out = None): sns.boxplot(x="Method", y="r2", data=self.df_best_expand, palette="PRGn") sns.despine(offset=10, trim=True) plt.ylabel( r"$r^2$") plt.xlabel( "Methods") if fname_out is not None: plt.savefig( fname_out) # index should be stored. elif self.fname is not None: fname_out = self.fname[:-4] + '_box.eps' print( 'Default: the figure of self.df_best_expand is saved to', fname_out) plt.savefig( fname_out)
def boxplot_expension( pdr, method_l, x="Group", y="RP", hue="Method"): # method_l = ['No_Regression', 'Mean_Compensation', 'Linear', 'Exp'] val_s = y pdw = expension_4_boxplot( pdr, method_l, x=x, y=y, hue=hue) sns.boxplot(x="Group", y=val_s, hue="Method", data=pdw, palette="PRGn") sns.despine(offset=10, trim=True)
def plot_multi(names, models, angles, runs=1000): indices = np.random.permutation(len(X_test))[:runs] matched_all = [] for i, idx in enumerate(indices): print("Processing {}/{}".format(i, len(indices))) probs, matched = compare(idx, angles, models) matched_all.append(matched) matched_all = np.array(matched_all) order = np.argsort(np.mean(matched_all, axis=0)) df = pd.DataFrame.from_items([(names[i], matched_all[:, i]) for i in order]) sb.boxplot(data=df) plt.show()
def stripboxplot(x, y, data, ax=None, significant=None, **kwargs): """ Overlay a stripplot on top of a boxplot. """ ax = sb.boxplot( x=x, y=y, data=data, ax=ax, fliersize=0, **kwargs ) plot = sb.stripplot( x=x, y=y, data=data, ax=ax, jitter=kwargs.pop("jitter", 0.05), color=kwargs.pop("color", "0.3"), **kwargs ) if data[y].min() >= 0: hide_negative_y_ticks(plot) if significant is not None: add_significance_indicator(plot=plot, significant=significant) return plot
def visualize_feature_boxplot(X,y,selected_feature,features): """ Visualize the boxplot of a feature Keyword arguments: X -- The feature vectors y -- The target vector selected_feature -- The desired feature to obtain the histogram features -- Vector of feature names (X1 to XN) """ #create data joint_data=np.column_stack((X,y)) column_names=features #create dataframe df=pd.DataFrame(data=joint_data,columns=column_names) # palette = sea.hls_palette() splot=sea.boxplot(data=df,x='Y',y=selected_feature,hue="Y",palette="husl") plt.title('BoxPlot Distribution of '+selected_feature) #save fig output_dir = "img" save_fig(output_dir,'{}/{}_boxplot.png'.format(output_dir,selected_feature)) # plt.show()
def inspect_bulk(df, df_bulk, de_genes, de_genes_bulk): """ """ quant_types = [("bitseq", df_bulk)] for quant_type, exp_matrix in quant_types: print(quant_type) # Boxplots of expression fig, axis = plt.subplots(1) sns.boxplot(data=pd.melt(exp_matrix), x="grna", y="value", hue="condition", ax=axis) fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.expression_boxplots.png".format(quant_type)), dpi=300, bbox_inches="tight") # Heatmap and correlation on signature genes # derived from bulk # derived from scRNA for geneset in ["de_genes", "de_genes_bulk"]: g = sns.clustermap( exp_matrix.ix[eval(geneset)].dropna(), z_score=0, row_cluster=True, col_cluster=True, xticklabels=True, yticklabels=True, figsize=(15, 15)) for item in g.ax_heatmap.get_yticklabels(): item.set_rotation(0) for item in g.ax_heatmap.get_xticklabels(): item.set_rotation(90) g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight") g = sns.clustermap( exp_matrix.ix[eval(geneset)].dropna().corr(), row_cluster=True, col_cluster=True, xticklabels=True, yticklabels=True, figsize=(15, 15)) for item in g.ax_heatmap.get_yticklabels(): item.set_rotation(0) for item in g.ax_heatmap.get_xticklabels(): item.set_rotation(90) g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.correlation.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")
def plot_box(df, x, y, hue, tag='eda', directory=None): r"""Display a Box Plot. Parameters ---------- df : pandas.DataFrame The dataframe containing the ``x`` and ``y`` features. x : str Variable name in ``df`` to display along the x-axis. y : str Variable name in ``df`` to display along the y-axis. hue : str Variable name to be used as hue, i.e., another data dimension. tag : str Unique identifier for the plot. directory : str, optional The full specification of the plot location. Returns ------- None : None. References ---------- http://seaborn.pydata.org/generated/seaborn.boxplot.html """ logger.info("Generating Box Plot") # Generate the box plot box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df) sns.despine(offset=10, trim=True) box_fig = box_plot.get_figure() # Save the plot write_plot('seaborn', box_fig, 'box_plot', tag, directory) # # Function plot_swarm #
def plot_alphadf(alphasdf, col_order, labeldict, metric='alpha'): """ Plot faceted alpha diversity. Parameters ---------- alphasdf : pandas DataFrame columns ['study', 'alpha', 'DiseaseState'] col_order : list dataset IDs in the order they should be plotted labeldict : dict dictionary with {dataset: label} mteric : str alpha diversity metric, to use in labeling y axis Returns ------- fig : Figure """ sns.set_style('white') g = sns.FacetGrid(alphasdf, col='study', col_wrap=6, col_order=col_order, sharex=False, sharey=False) g = g.map(sns.boxplot, "DiseaseState", "alpha") g = g.map(sns.stripplot, "DiseaseState", "alpha", split=True, jitter=True, size=5, linewidth=0.6) fig = plt.gcf() fig.set_size_inches(14.2, 9) # Fix y-axis gridlines axs = g.axes for i in range(len(axs)): ax = axs[i] yticks = ax.get_yticks() # If bottom limit is between 0 and 1 (i.e. not simpson) if not (yticks[0] < 1 and yticks[0] > 0): ax.set_ylim(floor(yticks[0]), floor(yticks[-1])) if yticks[0] < 0: ax.set_ylim(0, floor(yticks[-1])) yticks = ax.get_yticks() if (yticks[0] < 1 and yticks[0] > 0): ax.set_yticks(yticks[1::2]) else: ax.set_yticks(yticks[::2]) # Need some space on the y-axis for p-values ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1]) # Update title oldtitle = ax.get_title() newtitle = labeldict[oldtitle.split('=')[1].strip()] ax.set_title(newtitle) # Update y label if i % 6 == 0: ax.set_ylabel(metric) plt.tight_layout() return fig
def plot_ubiq_abun_boxplot(tidy, metric, calculation): """ Plot boxplot where x-axis is 'overall_significance' of genus, and values are either ubiquity or abundance in tidy (with the respective metric and calculation type) Parameters ---------- tidy : pandas dataframe has columns overall_significance, value, patient, metric, and calculation metric : str 'abundance' or 'ubiquity' calculation: str 'from_pooled_mean' or 'mean_of_datasets' Returns ------- ax : Axis object """ fig, ax = plt.subplots(figsize=(5.5,4)) tmp = tidy.query('metric == @metric')\ .query('calculation == @calculation')\ .query('patient == "total"') boxprops = {'edgecolor': 'k', 'facecolor': 'w'} lineprops = {'color': 'k'} # Plot log10(abundance) if metric == 'abundance': tmp.loc[tmp.index, 'value'] = tmp['value'].apply(np.log10) sns.boxplot(data=tmp, x='overall_significance', y='value', fliersize=0, ax=ax, color='w', order=['health', 'disease', 'mixed', 'not_sig'], **{'boxprops': boxprops, 'medianprops': lineprops, 'whiskerprops': lineprops, 'capprops': lineprops}) sns.stripplot(data=tmp, x='overall_significance', y='value', jitter=True, linewidth=0.6, split=True, ax=ax, order=['health', 'disease', 'mixed', 'not_sig'], color='w') return fig, ax
def visualize_boxplots(X,y): """ Visualize the boxplots of the features Keyword arguments: X -- The feature vectors y -- The target vector """ credit=X[:,0:1] df=pd.DataFrame(data=credit,columns=["Credit"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Credit') plt.show() one_to_four_columns=X[:,1:4] df=pd.DataFrame(data=one_to_four_columns,columns=["Gender","Education","Marital Status"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: Gender, Education and Marital Status') plt.show() age=X[:,4:5] df=pd.DataFrame(data=age,columns=["Age"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Age') plt.show() x6_to_x11=X[:,5:11] df=pd.DataFrame(data=x6_to_x11,columns=["X6","X7","X8","X9","X10","X11"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: History of Payment') plt.show() x12_to_x17=X[:,11:17] df=pd.DataFrame(data=x12_to_x17,columns=["X12","X13","X14","X15","X16","X17"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: Amount of Bill Statements') plt.show() x18_to_x23=X[:,17:23] df=pd.DataFrame(data=x12_to_x17,columns=["X18","X19","X20","X21","X22","X23"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: Amount of Previous Payments') plt.show()