我们从Python开源项目中,提取了以下31个代码示例,用于说明如何使用seaborn.barplot()。
def factor_plot(dataFrame, factors, prediction, color="Set3"): # First, plot the total for each factor. Then, plot the total for each # factor for the prediction variable (so in a conversion example, how # many people converted, revenue per country, etc.) # These refer to the rows and columns of the axis numpy array; not the # data itself. row = 0 column = 0 sns.set(style="whitegrid") # TODO: Set the width based on the max number of unique # values for the factors. plots = plt.subplots(len(factors), 2, figsize=(8,12)) # It should for factor in factors: sns.countplot(x=factor, palette="Set3", data=dataFrame, ax=plots[1][row][column]) # Then print the total for each prediction sns.barplot(x=factor, y=prediction, data=dataFrame, ax=plots[1][row][column+1]) row += 1 plt.tight_layout() # Need this or else plots will crash into each other
def plot_bars_sns(data_map, title, xlab, ylab, plotter): """Barplot using seaborn backend. :param data_map: A dictionary of labels and values. :param title: Plot title. :param xlab: X axis label. :param ylab: Y axis label. :param plotter: A wub.vis.report.Report instance. """ data = pd.DataFrame({'Value': list(data_map.values()), 'Label': list(data_map.keys()), 'x': np.arange(len(data_map))}) ax = sns.barplot(x="x", y="Value", hue="Label", data=data, hue_order=list(data_map.keys())) ax.set_title(title) ax.set_xlabel(xlab) ax.set_ylabel(ylab) ax.set_xticks([]) plotter.pages.savefig() plotter.plt.clf()
def plot_pred_vs_image(img,preds_df,out_name): # function to plot predictions vs image f, axarr = plt.subplots(2, 1) plt.suptitle("ResNet50- PreTrained on ImageNet") axarr[0].imshow(img) sns.set_style("whitegrid") pl = sns.barplot(data = preds_df, x='Score', y='Species') axarr[1] = sns.barplot(data = preds_df, x='Score', y='Species',) axarr[0].autoscale(enable=False) axarr[0].get_xaxis().set_ticks([]) axarr[0].get_yaxis().set_ticks([]) axarr[1].autoscale(enable=False) gs = gridspec.GridSpec(2,1, width_ratios=[1],height_ratios=[1,0.1]) plt.tight_layout() plt.savefig(out_name + '.png') ######################### # Models ######################### # load model
def plotPrediction(pred): """ Plots the prediction than encodes it to base64 :param pred: prediction accuracies :return: base64 encoded image as string """ labels = ['setosa', 'versicolor', 'virginica'] sns.set_context(rc={"figure.figsize": (5, 5)}) with sns.color_palette("RdBu_r", 3): ax = sns.barplot(x=labels, y=pred) ax.set(ylim=(0, 1)) # Base64 encode the plot stringIObytes = cStringIO.StringIO() sns.plt.savefig(stringIObytes, format='jpg') sns.plt.show() stringIObytes.seek(0) base64data = base64.b64encode(stringIObytes.read()) return base64data
def image(path, costs): ys = ['0', '1', '2', '3', '4', '5', '6', '7+', 'X'] xs = [costs.get(k, 0) for k in ys] sns.set_style('white') sns.set(font='Concourse C3', font_scale=3) g = sns.barplot(ys, xs, palette=['grey'] * len(ys)) g.axes.yaxis.set_ticklabels([]) rects = g.patches sns.set(font='Concourse C3', font_scale=2) for rect, label in zip(rects, xs): if label == 0: continue height = rect.get_height() g.text(rect.get_x() + rect.get_width()/2, height + 0.5, label, ha='center', va='bottom') g.margins(y=0, x=0) sns.despine(left=True, bottom=True) g.get_figure().savefig(path, transparent=True, pad_inches=0, bbox_inches='tight') plt.clf() # Clear all data from matplotlib so it does not persist across requests. return path
def xgboost_feature_importance(model, train, return_df=False): features = train.columns create_feature_map(features) importance = model.get_fscore(fmap='xgb.fmap') importance = sorted(importance.items(), key=operator.itemgetter(1)) df = pd.DataFrame(importance, columns=['feature', 'fscore']) df['fscore'] = df['fscore'] / df['fscore'].sum() sns.barplot(x="fscore", y="feature", data=df) # plt.xticks(range(len(df)), df.feature.tolist(), rotation=60) plt.title('Feature Importances') plt.ylabel('Relative Importance') print df if return_df is True: return df
def get_graph_features_mean(col_list): am = analyse_mean.set_index('features') am = am.stack().reset_index() am.columns = ['features', 'type', 'score'] plt.figure(figsize=(18,6)); if isinstance(bench_features_bool, list): print "List" sns.barplot('score', 'features', data=am[am.features.isin(col_list)], hue='type', hue_order=['error','good']) else: sns.barplot('score', 'features', data=am[am.features == col_list], hue='type', hue_order=['error','good']) # plt.xticks(rotation=40) plt.legend() # base : # 0.9412 *** # without : u'is_mister_word_1b', u'is_mister_word_2b', u'is_mister_word_1a', u'is_mister_word_2a' # 0.9344 # without : word_encoded' # 0.9268
def target_plot(self): target_type = self.input_data.metadata.loc[self.target].type target_data = self.input_data.df[self.target] sns.set(style="white", color_codes=True) if not self.run_time_config['is_time_series']: if target_type == ColType.BINARY: plt.figure(figsize=(6, 1)) sns.barplot(target_data.sum() / target_data.shape[0]) plt.xlim([0, 1]) plt.title(target_data.name + ' rate') elif target_type == ColType.NUMERIC or target_type == ColType.ORDINAL: plt.figure(figsize=(6, 2)) ax = sns.distplot(target_data, hist_kws=dict(edgecolor='black')) ax.set_xlim(target_data.min(), target_data.max()) plt.title(target_data.name + ' histogram') else: self.time_series_target_plot()
def plot_changed_files_by_type(self): sns.barplot(x="Files", y=self.changed_file_type_frame.index, data=self.changed_file_type_frame)
def plot_changed_files_by_category(self): sns.barplot(x="Files", y=self.changed_file_category_frame.index, data=self.changed_file_category_frame)
def plot_facet(self, **kwargs): params = self.get_parameters(**kwargs) ax = sns.barplot(**params) if self._x and self._y: self.legend_title = params["hue"] self.legend_levels = params["hue_order"]
def plot_br_chart(self,column): if type(self.woe_dicts[column].items()[0][0]) == str: woe_lists = sorted(self.woe_dicts[column].items(), key = self.sort_dict) else: woe_lists = sorted(self.woe_dicts[column].items(),key = lambda item:item[0]) sns.set_style(rc={"axes.facecolor": "#EAEAF2", "axes.edgecolor": "#EAEAF2", "axes.linewidth": 1, "grid.color": "white",}) tick_label = [i[0] for i in woe_lists] counts = [i[1][1] for i in woe_lists] br_data = [i[1][2] for i in woe_lists] x = range(len(counts)) fig, ax1 = plt.subplots(figsize=(12,8)) my_palette = sns.color_palette(n_colors=100) sns.barplot(x,counts,ax=ax1,palette=sns.husl_palette(n_colors=20,l=.7)) plt.xticks(x,tick_label,rotation = 30,fontsize=12) plt.title(column,fontsize=18) ax1.set_ylabel('count',fontsize=15) ax1.tick_params('y',direction='in',length=6, width=0.5, labelsize=12) #ax1.bar(x,counts,tick_label = tick_label,color = 'y',align = 'center') #ax1.bar(x,counts,color = 'y',align = 'center') ax2 = ax1.twinx() ax2.plot(x,br_data,color='black') ax2.set_ylabel('bad rate',fontsize=15) ax2.tick_params('y',direction='in',length=6, width=0.5, labelsize=12) plot_margin = 0.25 x0, x1, y0, y1 = ax1.axis() ax1.axis((x0 - plot_margin, x1 + plot_margin, y0 - 0, y1 * 1.1)) plt.show()
def save_br_chart(self, column, path): if type(self.woe_dicts[column].items()[0][0]) == str: woe_lists = sorted(self.woe_dicts[column].items(), key = self.sort_dict) else: woe_lists = sorted(self.woe_dicts[column].items(),key = lambda item:item[0]) tick_label = [i[0] for i in woe_lists] counts = [i[1][1] for i in woe_lists] br_data = [i[1][2] for i in woe_lists] x = range(len(counts)) fig, ax1 = plt.subplots(figsize=(12,8)) my_palette = sns.color_palette(n_colors=100) sns.barplot(x,counts,ax=ax1,palette=sns.husl_palette(n_colors=20,l=.7)) plt.xticks(x,tick_label,rotation = 30,fontsize=12) plt.title(column,fontsize=18) ax1.set_ylabel('count',fontsize=15) ax1.tick_params('y',labelsize=12) ax2 = ax1.twinx() ax2.plot(x,br_data,color='black') ax2.set_ylabel('bad rate',fontsize=15) ax2.tick_params('y',labelsize=12) plot_margin = 0.25 x0, x1, y0, y1 = ax1.axis() ax1.axis((x0 - plot_margin, x1 + plot_margin, y0 - 0, y1 * 1.1)) plt.savefig(path)
def plot_stats(stats): stats = stats.sort_values('frac_obs', ascending=False) stats = pd.melt(stats, id_vars=['output'], var_name='metric') # stats = stats.loc[stats.metric.isin(['frac_obs', 'frac_one'])] # stats.metric = stats.metric.str.replace('frac_obs', 'cov') # stats.metric = stats.metric.str.replace('frac_one', 'met') grid = sns.FacetGrid(data=stats, col='metric', sharex=False) grid.map(sns.barplot, 'value', 'output') for ax in grid.axes.ravel(): ax.set(xlabel='', ylabel='') return grid
def explore_feature_variation(self, col=None, use_target=False, **kwargs): ''' Produces univariate plots of a given set of columns. Barplots are used for categorical columns while histograms (with fitted density functinos) are used for numerical columns. If use_target is true, then the variation of the given set of columns with respect to the response variable are used (e.g., 2d scatter plots, boxplots, etc). Parameters ---------- col : a string of a column name, or a list of many columns names or None (default). If col is None, all columns will be used. use_target : bool, default False Whether to use the target column in the plots. **kwargs: additional arguments to be passed to seaborn's distplot or to pandas's plotting utilities.. ''' self._validate_params(params_list = {'col':col}, expected_types= {'col':[str,list,type(None)]}) if type(col) is str: col = [col] if col is None: col = self._get_all_features() if use_target == False: for column in col: if self.is_numeric(self.df[column]) == True: plt.figure(column) #sns.despine(left=True) sns.distplot(self.df[column], color="m", **kwargs) plt.title(column) plt.tight_layout() #plt.figure('boxplot') #sns.boxplot(x=self.df[col], palette="PRGn") #sns.despine(offset=10, trim=True) elif self.is_categorical(self.df[column]) == True: #print self.df[column].describe() plt.figure(column) #sns.despine(left=True) if len(self.df[column].unique()) > 30: self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs) #top = pd.DataFrame(data=top) #sns.barplot(y=top.index, x=top) else: self.df[column].value_counts()[::-1].plot.barh(**kwargs) #sns.countplot(y=self.df[column]) plt.title(column) plt.tight_layout() else: raise TypeError('TYPE IS NOT SUPPORTED') else: # use target variable for column in col: self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
def save_bar_graph(x, y, file_name): plt.clf() sns.set_style("whitegrid") ax = sns.barplot(x=x, y=y) for item in ax.get_xticklabels(): item.set_rotation(15) plt.savefig(file_name)
def save_graph_with_icon(x, y, images, file_name): plt.clf() sns.set_style("whitegrid") ax = sns.barplot(x=x, y=y, ci=None) # erase ticks ax.get_xaxis().set_ticklabels([], fontsize=45) # expand label size by fontsize parameter TICK_POS = -0.25 SIZE_IN_TICK = 1 scale = ax.transData.transform((1, 1)) - ax.transData.transform((0, 0)) x_scale = scale[0] / scale[1] for i, _x in enumerate(x): label_x = _x # adjustment is not needed in saved file left = label_x - (SIZE_IN_TICK / x_scale / 2) down = TICK_POS - SIZE_IN_TICK right = label_x + (SIZE_IN_TICK / x_scale / 2) top = TICK_POS leftDown = ax.transData.transform((left, down)) rightUpper = ax.transData.transform((right, top)) bbox_image = BboxImage(Bbox([leftDown, rightUpper]), norm=None, origin=None, clip_on=False ) bbox_image.set_data(images[i]) ax.add_artist(bbox_image) plt.savefig(file_name)
def calculate_word_count_stats(articles: pd.DataFrame): """Calculate aggregate word count statistics on each source's articles.""" by_source = articles.groupby(['base_url'])['word_count'] by_source = by_source.agg(['count', 'mean', 'std']) by_source.sort_values('count', ascending=False, inplace=True) print_full(by_source) top_sources = by_source.head(10).index top_counts = by_source.reset_index()[by_source.index.isin(top_sources)] sns.barplot(x='base_url', y='count', data=top_counts) sns.plt.show() sns.boxplot(x='base_url', y='word_count', data=articles[articles['base_url'].isin(top_sources)]) sns.plt.show()
def plot_counts(count_series, x, y, order, y_label, title, out_path): df = series_to_plot_dict(count_series, x, y) fig, ax = sns.plt.subplots() sns.barplot(data=df, x=x, y=y, order=order) sns.plt.ylabel(y_label) sns.plt.title(title) sns.plt.savefig(out_path)
def plot_binned_response_rate(lift: pd.DataFrame): import seaborn as sns plt.figure() sns.barplot(y=lift['NumCorrectPredictions'] / lift['NumCases'], x=lift.index.tolist(), color='salmon', saturation=0.5) plt.show()
def plot_bins(self, attr): """ Plot barplot of binned values. Input: attr (str) the attribute to use when plotting Output: plot object """ sns.barplot(range(len(getattr(self, attr))), getattr(self, attr)) plt.xlabel(attr) plt.show()
def _prepareWeekdayByMonthStats(stats): # Add day and month columns, and groupby stats = stats.copy() stats['day'] = stats['date'].dt.weekday stats['month'] = stats['date'].dt.month dataToPlot = stats.groupby(['day', 'month']).mean() dataToPlot = dataToPlot.reset_index() dataToPlot['day'].replace(dayOfWeek, inplace=True) dataToPlot['month'].replace(months, inplace=True) return dataToPlot # def plotWeekdayStats(stats, columns): # """ # Plot aggregated (mean) stats by dayOfWeek # :param stats: data to plot # :param columns: columns from stats to plot # """ # MEASURE_NAME = 'weekday' # dayOfWeek={0:'Mon', 1:'Tue', 2:'Wed', 3:'Thur', 4:'Fri', 5:'Sat', 6:'Sun'} # order = ['Mon','Tue','Wed','Thur','Fri','Sat','Sun'] # stats[MEASURE_NAME] = stats[MEASURE_NAME].map(dayOfWeek) # # f, axes = getAxes(2,2) # for i, c in enumerate(columns): # if c in NAMES: # c = NAMES[c] # g = sns.barplot(x=MEASURE_NAME, y=c, data=stats, order=order, ax=axes[i]) # g.set_xlabel('') # sns.plt.show() # #plot(stats, columns, MEASURE_NAME, 2, 3, order=order)
def plot(data, columns, measureName, nrows, ncols, order=None): f, axes = plt.subplots(nrows=nrows, ncols=ncols) axes = axes.reshape(-1) for i, c in enumerate(columns): sns.barplot(x=measureName, y=c, data=data, order=order, ax=axes[i]) sns.plt.show()
def plot_correlation(self, on, x_col=None, plot_type="jointplot", stat_func=pearsonr, show_stat_func=True, plot_kwargs={}, **kwargs): """Plot the correlation between two variables. Parameters ---------- on : list or dict of functions or strings See `cohort.load.as_dataframe` x_col : str, optional If `on` is a dict, this guarantees we have the expected ordering. plot_type : str, optional Specify "jointplot", "regplot", "boxplot", or "barplot". stat_func : function, optional. Specify which function to use for the statistical test. show_stat_func : bool, optional Whether or not to show the stat_func result in the plot itself. plot_kwargs : dict, optional kwargs to pass through to plotting functions. """ if plot_type not in ["boxplot", "barplot", "jointplot", "regplot"]: raise ValueError("Invalid plot_type %s" % plot_type) plot_cols, df = self.as_dataframe(on, return_cols=True, **kwargs) if len(plot_cols) != 2: raise ValueError("Must be comparing two columns, but there are %d columns" % len(plot_cols)) for plot_col in plot_cols: df = filter_not_null(df, plot_col) if x_col is None: x_col = plot_cols[0] y_col = plot_cols[1] else: if x_col == plot_cols[0]: y_col = plot_cols[1] else: y_col = plot_cols[0] series_x = df[x_col] series_y = df[y_col] coeff, p_value = stat_func(series_x, series_y) if plot_type == "jointplot": plot = sb.jointplot(data=df, x=x_col, y=y_col, stat_func=stat_func if show_stat_func else None, **plot_kwargs) elif plot_type == "regplot": plot = sb.regplot(data=df, x=x_col, y=y_col, **plot_kwargs) elif plot_type == "boxplot": plot = stripboxplot(data=df, x=x_col, y=y_col, **plot_kwargs) else: plot = sb.barplot(data=df, x=x_col, y=y_col, **plot_kwargs) return CorrelationResults(coeff=coeff, p_value=p_value, stat_func=stat_func, series_x=series_x, series_y=series_y, plot=plot)
def att_plot(top_labels, gt_ind, probs, fn): # plt.figure(figsize=(5, 5)) # # color_dict = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS) # colors = [color_dict[c] for c in # ['lightcoral', 'steelblue', 'forestgreen', 'darkviolet', 'sienna', 'dimgrey', # 'darkorange', 'gold']] # colors[gt_ind] = color_dict['crimson'] # w = 0.9 # plt.bar(np.arange(len(top_labels)), probs, w, color=colors, alpha=.9, label='data') # plt.axhline(0, color='black') # plt.ylim([0, 1]) # plt.xticks(np.arange(len(top_labels)), top_labels, fontsize=6) # plt.subplots_adjust(bottom=.15) # plt.tight_layout() # plt.savefig(fn) lab = deepcopy(top_labels) lab[gt_ind] += ' (gt)' d = pd.DataFrame(data={'probs': probs, 'labels':lab}) fig, ax = plt.subplots(figsize=(4,5)) ax.tick_params(labelsize=15) sns.barplot(y='labels', x='probs', ax=ax, data=d, orient='h', ci=None) ax.set(xlim=(0,1)) for rect, label in zip(ax.patches,lab): w = rect.get_width() ax.text(w+.02, rect.get_y() + rect.get_height()*4/5, label, ha='left', va='bottom', fontsize=25) # ax.yaxis.set_label_coords(0.5, 0.5) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.get_yaxis().set_visible(False) ax.get_xaxis().label.set_visible(False) fig.savefig(fn, bbox_inches='tight', transparent=True) plt.close('all')
def __create_bar_plot(dataframe): personality_results, column_names = __get_sorted_personality_traits_and_columns(dataframe) pyplot.subplots(figsize=(16, 12)) colors = __create_colors_and_legend(column_names) # Use palette instead of hue in barplot because seaborn renders super thin bars when using hue (seaborn bug) seaborn.barplot(x=personality_results, y=column_names, orient="h", palette=colors) pyplot.savefig(__get_file_path('trump-personality-results.png')) print('Bar plot created with trump personality results! View results in trump-personality-results.png') pyplot.show()
def enrich_signature(method="pca", percentile=99, results_dir="results", experiment="CROP-seq_Jurkat_TCR", n_genes=500): """ """ diff = pd.read_csv(os.path.join(results_dir, "{}.differential_expression.{}.stimutation.csv".format(experiment, method)), squeeze=True, index_col=0, header=None, names=["gene_name"]) degs = pd.Series(diff[abs(diff) > np.percentile(abs(diff), percentile)].index) degs.name = "gene_name" enr = enrichr(degs.reset_index()) enr.to_csv(os.path.join(results_dir, "differential_expression.{}.enrichr.csv".format(method)), index=False, encoding="utf8") # Plot top N terms of each library n = 8 to_plot = [ 'GO_Biological_Process_2015', "KEGG_2016", "WikiPathways_2016", "Reactome_2016", "BioCarta_2016", "NCI-Nature_2016"] p = enr.ix[enr[enr['gene_set_library'].isin(to_plot)].groupby("gene_set_library")['combined_score'].nlargest(n).index.get_level_values(1)].sort_values("combined_score", ascending=False) fig, axis = plt.subplots(1) sns.barplot(data=p, y="description", x="combined_score", orient="horiz", hue="gene_set_library") axis.set_xlabel("Combined score") sns.despine(fig) fig.savefig(os.path.join(results_dir, "differential_expression.{}.enrichr.top{}_terms.svg".format(method, n)), bbox_inches="tight")
def model_error_by_feature(self, model_name): model_results = self.model_results[model_name] metadata = model_results.metadata holdout = model_results.holdout_data low_card_cols = [ColType.CATEGORICAL, ColType.ORDINAL] for col in metadata[metadata.type.isin(low_card_cols)].col_name: sns.barplot(x=col, y='error', data=holdout) plt.show()
def bars(data,color='black',title=''): data = pd.DataFrame(data.value_counts()) data = data.reset_index() data.columns = ['keyword','value'] data['keyword'] = data['keyword'][1:] data = data.dropna() data = data.reset_index(drop=True) data = data.sort_values('value',ascending=False) sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 0}) x = data.head(20)['keyword'].astype(str) y = data.head(20)['value'].astype(int) f, ax = plt.subplots(figsize=(16, 3)) sns.set_style('white') ## change color of the bar based on value colors = [color if _y >=0 else 'red' for _y in y] sns.barplot(x, y, palette=colors, ax=ax) plt.title(title, fontsize=18, y=1.12, color="gray"); ax.set_xticklabels('') ax.set_ylabel('') ax.set_xlabel('') ax.tick_params(axis='both', which='major', pad=30) for n, (label, _y) in enumerate(zip(x, y)): ax.annotate( s='{:.1f}'.format(abs(_y)), xy=(n, _y), ha='center',va='center', xytext=(0,-10), size=12, textcoords='offset points', color="white", weight="bold" ) ax.set_yticklabels(""); ax.set_xticklabels(data.head(20)['keyword'],rotation=25,ha="right"); ax.tick_params(axis='both', which='major', pad=15) sns.despine(left=True)
def _plotMonthlyStats(stats, columns, groupBy=True): dataToPlot = stats.copy() # Group by month and rename date column if groupBy: dataToPlot = dataToPlot.groupby(stats['date'].dt.month).mean() dataToPlot = dataToPlot.reset_index().rename(columns={'date': 'month'}) # change stats from columns to row attribute dataToPlot = pd.melt(dataToPlot, id_vars=['month'], value_vars=columns, var_name='stats', value_name='val') # Rename stats and weekdays dataToPlot['stats'].replace(NAMES, inplace=True) dataToPlot['month'].replace(months, inplace=True) order = [m for m in monthsOrder if m in dataToPlot['month'].unique()] # Plot g = sns.factorplot(data=dataToPlot, x="month", y="val", col="stats", order=order, kind="bar", sharey=False) g.set_xticklabels(rotation=45) g.set(xlabel='') return g #sns.plt.show() # def _plotMonthlyStats(stats, columns): # """ # Plot aggregated (mean) stats by month # :param stats: data to plot # :param columns: columns from stats to plot # """ # MEASURE_NAME = 'month' # months={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', # 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'} # order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] # stats[MEASURE_NAME] = stats[MEASURE_NAME].map(months) # # order = [m for m in order if m in stats[MEASURE_NAME].unique()] # # f, axes = getAxes(2,2) # for i, c in enumerate(columns): # if c in NAMES: # c = NAMES[c] # g = sns.barplot(x=MEASURE_NAME, y=c, data=stats, order=order, ax=axes[i]) # g.set_xlabel('') # sns.plt.show()
def fishers_exact_plot(data, condition1, condition2, ax=None, condition1_value=None, alternative="two-sided", **kwargs): """ Perform a Fisher's exact test to compare to binary columns Parameters ---------- data: Pandas dataframe Dataframe to retrieve information from condition1: str First binary column to compare (and used for test sidedness) condition2: str Second binary column to compare ax : Axes, default None Axes to plot on condition1_value: If `condition1` is not a binary column, split on =/!= to condition1_value alternative: Specify the sidedness of the test: "two-sided", "less" or "greater" """ plot = sb.barplot( x=condition1, y=condition2, ax=ax, data=data, **kwargs ) plot.set_ylabel("Percent %s" % condition2) condition1_mask = get_condition_mask(data, condition1, condition1_value) count_table = pd.crosstab(data[condition1], data[condition2]) print(count_table) oddsratio, p_value = fisher_exact(count_table, alternative=alternative) add_significance_indicator(plot=plot, significant=p_value <= 0.05) only_percentage_ticks(plot) if alternative != "two-sided": raise ValueError("We need to better understand the one-sided Fisher's Exact test") sided_str = "two-sided" print("Fisher's Exact Test: OR: {}, p-value={} ({})".format(oddsratio, p_value, sided_str)) return FishersExactResults(oddsratio=oddsratio, p_value=p_value, sided_str=sided_str, with_condition1_series=data[condition1_mask][condition2], without_condition1_series=data[~condition1_mask][condition2], plot=plot)