def visualize_results(self): # Visualize logistic curve using seaborn sns.set(style="darkgrid") sns.regplot(x="pageviews_cumsum", y="is_conversion", data=self.df, logistic=True, n_boot=500, y_jitter=.01, scatter_kws={"s": 60}) sns.set(font_scale=1.3) sns.plt.title('Logistic Regression Curve') sns.plt.ylabel('Conversion probability') sns.plt.xlabel('Cumulative sum of pageviews') sns.plt.subplots_adjust(right=0.93, top=0.90, left=0.10, bottom=0.10) sns.plt.show()
def show_scatter(df, xlim=(-5, 105), ylim=(-5, 105), color="black", marker="o", reg_fit=False): """Create a scatter plot of the data Args: df (pd.DataFrame): The data set to plot xlim ((float, float)): The x-axis limits ylim ((float, float)): The y-axis limits color (str): The color of the scatter points marker (str): The marker style for the scatter points reg_fit (bool): Whether to plot a linear regression on the graph """ sns.regplot( x="x", y="y", data=df, ci=None, fit_reg=reg_fit, marker=marker, scatter_kws={"s": 50, "alpha": 0.7, "color": color}, line_kws={"linewidth": 4, "color": "red"}) plt.xlim(xlim) plt.ylim(ylim) plt.tight_layout()
def basic_linear(wine_set): scat0 = seaborn.regplot(x="volatile_acidity", y="quality", fit_reg=True, data=wine_set) plt.xlabel("Amount of volatile acidity in wine") plt.ylabel("Quality level of wine (0-10 scale)") plt.title("Association between the amount of volatile acidity in wine and the quality of wine") plt.show() # ----------- centering the explanatory variable by subrtacting the mean f_acidity_mean = wine_set["volatile_acidity"].mean() print("mean of the volatile acidity variable = ", f_acidity_mean) wine_set["volatile_acidity"] = wine_set["volatile_acidity"] - f_acidity_mean print("mean of the volatile acidity variable after normalization = ", wine_set["volatile_acidity"].mean()) print ("\nOLS regression model for the association between the amount of volatile acidity in wine and the quality of wine:") model1 = smf.ols(formula="quality ~ volatile_acidity", data=wine_set) results1 = model1.fit() print(results1.summary()) # call(basic_linear) # #___________________________________ Multiple Regression___________________________________________
def plot_eval(self, eval_dict, labels, path_extension=""): """ Plot the loss function in a overall plot and a zoomed plot. :param path_extension: If the plot should be saved in an incremental way. """ def plot(x, y, fit, label): sns.regplot(np.array(x), np.array(y), fit_reg=fit, label=label, scatter_kws={"s": 5}) plt.clf() plt.subplot(211) idx = np.array(eval_dict.values()[0]).shape[0] x = np.array(eval_dict.values()) for i in range(idx): plot(eval_dict.keys(), x[:, i], False, labels[i]) plt.legend() plt.subplot(212) for i in range(idx): plot(eval_dict.keys()[-int(len(x) * 0.25):], x[-int(len(x) * 0.25):][:, i], True, labels[i]) plt.xlabel('Epochs') plt.savefig(paths.get_plot_evaluation_path_for_model(self.model.get_root_path(), path_extension+".png"))
def visualize_results(df): # Visualize logistic curve using seaborn sns.set(style="darkgrid") sns.regplot(x="pageviews_cumsum", y="is_conversion", data=df, logistic=True, n_boot=500, y_jitter=.01, scatter_kws={"s": 60}) sns.set(font_scale=1.3) sns.plt.title('Logistic Regression Curve') sns.plt.ylabel('Conversion probability') sns.plt.xlabel('Cumulative sum of pageviews') sns.plt.subplots_adjust(right=0.93, top=0.90, left=0.10, bottom=0.10) sns.plt.show() # Run the final program
def show_scatter_and_results(df): """Creates a plot which shows both the plot and the statistical summary Args: df (pd.DataFrame): The data set to plot labels (List[str]): The labels to use for """ plt.figure(figsize=(12, 5)) sns.regplot("x", y="y", data=df, ci=None, fit_reg=False, scatter_kws={"s": 50, "alpha": 0.7, "color": "black"}) plt.xlim(-5, 105) plt.ylim(-5, 105) plt.tight_layout() res = get_values(df) fs = 30 y_off = -5 labels = ("X Mean", "Y Mean", "X SD", "Y SD", "Corr.") max_label_length = max([len(l) for l in labels]) # If `max_label_length = 10`, this string will be "{:<10}: {:0.9f}", then we # can pull the `.format` method for that string to reduce typing it # repeatedly formatter = '{{:<{pad}}}: {{:0.9f}}'.format(pad=max_label_length).format corr_formatter = '{{:<{pad}}}: {{:+.9f}}'.format(pad=max_label_length).format opts = dict(fontsize=fs, alpha=0.3) plt.text(110, y_off + 80, formatter(labels[0], res[0])[:-2], **opts) plt.text(110, y_off + 65, formatter(labels[1], res[1])[:-2], **opts) plt.text(110, y_off + 50, formatter(labels[2], res[2])[:-2], **opts) plt.text(110, y_off + 35, formatter(labels[3], res[3])[:-2], **opts) plt.text(110, y_off + 20, corr_formatter(labels[4], res[4], pad=max_label_length)[:-2], **opts) opts['alpha'] = 1 plt.text(110, y_off + 80, formatter(labels[0], res[0])[:-7], **opts) plt.text(110, y_off + 65, formatter(labels[1], res[1])[:-7], **opts) plt.text(110, y_off + 50, formatter(labels[2], res[2])[:-7], **opts) plt.text(110, y_off + 35, formatter(labels[3], res[3])[:-7], **opts) plt.text(110, y_off + 20, corr_formatter(labels[4], res[4], pad=max_label_length)[:-7], **opts) plt.tight_layout(rect=[0, 0, 0.57, 1])
def pearson(wine_set): scat1 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=wine_set) plt.xlabel("Density of wine") plt.ylabel("Residual sugar in wine, gram") plt.title("Association between wine's density and residual sugar") plt.show() print(scipy.stats.pearsonr(wine_set['density'], wine_set["residual_sugar"])) # print('----------------Pearson Correlation------------------------') # call(pearson) # -----------------------------------------Exploring Statistical Interactions------------------
def explore(wine_set): low = wine_set[wine_set['quality'] <= 5] medium = wine_set[(wine_set['quality'] == 6) | (wine_set['quality'] == 7)] high = wine_set[wine_set['quality'] > 7] print('association between wine`s density and residual sugar for wines \nof `low` quality') print(scipy.stats.pearsonr(low['density'], low["residual_sugar"])) print('\nof `medium` quality') print(scipy.stats.pearsonr(medium['density'], medium["residual_sugar"])) print('\nof `high` quality') print(scipy.stats.pearsonr(high['density'], high["residual_sugar"])) scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=low) plt.xlabel("Density of wine") plt.ylabel("Residual sugar in wine, gram") plt.title("Association between wine's density and residual sugar for wines of `low` quality") plt.show() scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=medium) plt.xlabel("Density of wine") plt.ylabel("Residual sugar in wine, gram") plt.title("Association between wine's density and residual sugar for wines of `medium` quality") plt.show() scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=high) plt.xlabel("Density of wine") plt.ylabel("Residual sugar in wine, gram") plt.title("Association between wine's density and residual sugar for wines of `high` quality") plt.show()
def plot_angle_comparison(disc, whole, lgdtext=None, fname=None): # if fname is not None: # mpl.rc("savefig", dpi=300) if lgdtext is None: lgdtext = [u"Disc-Only Angle (deg)", 'Whole TRPV1 Angle (deg)'] sns.set(font_scale=3) f, ax = plt.subplots(1, 3, figsize=(30, 10)) sns.regplot(whole["rlnAngleRot"], disc["rlnAngleRot"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[0]) ax[0].set_xlim((-45, 45)) ax[0].set_ylim((-45, 45)) ax[0].set_xticks(np.arange(-45, 46, 15)) ax[0].set_yticks(np.arange(-45, 46, 15)) ax[0].xaxis.label.set_visible(False) ax[0].set_ylabel(lgdtext[0]) ax[0].set_title(u"$\phi$ ( $Z$ )", y=1.01) sns.regplot(whole["rlnAngleTilt"], disc["rlnAngleTilt"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[1]) ax[1].set_xlim((0, 180)) ax[1].set_ylim((0, 180)) ax[1].set_xticks(np.arange(0, 181, 30)) ax[1].set_yticks(np.arange(0, 181, 30)) ax[1].xaxis.label.set_visible(False) ax[1].yaxis.label.set_visible(False) ax[1].set_title(u"$\theta$ ( $Y'$ )", y=1.01) sns.regplot(whole["rlnAnglePsi"], disc["rlnAnglePsi"], fit_reg=False, scatter_kws={"s": 16}, ax=ax[2]) ax[2].set_xlim((-180, 180)) ax[2].set_ylim((-180, 180)) ax[2].set_xticks(np.arange(-180, 181, 45)) ax[2].set_yticks(np.arange(-180, 181, 45)) ax[2].xaxis.label.set_visible(False) ax[2].yaxis.label.set_visible(False) ax[2].set_title(u"$\psi$ ( $Z''$ )", y=1.01) f.text(0.5, -0.05, lgdtext[1], ha='center', fontsize=36) f.tight_layout(pad=1., w_pad=-1.5, h_pad=0.5) if fname is not None: f.savefig(fname, dpi=300) # mpl.rc("savefig", dpi=80) return f, ax
def regression(data,x,y,xscale='linear',yscale='linear'): sns.set_context("notebook", font_scale=.8, rc={"lines.linewidth": 0}) sns.set_style('white') g = sns.regplot(x=x, y=y, data=data) plt.tick_params(axis='both', which='major', pad=10) g.set(xscale=xscale) g.set(yscale=yscale) sns.despine()
def plot_correlation(self, on, x_col=None, plot_type="jointplot", stat_func=pearsonr, show_stat_func=True, plot_kwargs={}, **kwargs): """Plot the correlation between two variables. Parameters ---------- on : list or dict of functions or strings See `cohort.load.as_dataframe` x_col : str, optional If `on` is a dict, this guarantees we have the expected ordering. plot_type : str, optional Specify "jointplot", "regplot", "boxplot", or "barplot". stat_func : function, optional. Specify which function to use for the statistical test. show_stat_func : bool, optional Whether or not to show the stat_func result in the plot itself. plot_kwargs : dict, optional kwargs to pass through to plotting functions. """ if plot_type not in ["boxplot", "barplot", "jointplot", "regplot"]: raise ValueError("Invalid plot_type %s" % plot_type) plot_cols, df = self.as_dataframe(on, return_cols=True, **kwargs) if len(plot_cols) != 2: raise ValueError("Must be comparing two columns, but there are %d columns" % len(plot_cols)) for plot_col in plot_cols: df = filter_not_null(df, plot_col) if x_col is None: x_col = plot_cols[0] y_col = plot_cols[1] else: if x_col == plot_cols[0]: y_col = plot_cols[1] else: y_col = plot_cols[0] series_x = df[x_col] series_y = df[y_col] coeff, p_value = stat_func(series_x, series_y) if plot_type == "jointplot": plot = sb.jointplot(data=df, x=x_col, y=y_col, stat_func=stat_func if show_stat_func else None, **plot_kwargs) elif plot_type == "regplot": plot = sb.regplot(data=df, x=x_col, y=y_col, **plot_kwargs) elif plot_type == "boxplot": plot = stripboxplot(data=df, x=x_col, y=y_col, **plot_kwargs) else: plot = sb.barplot(data=df, x=x_col, y=y_col, **plot_kwargs) return CorrelationResults(coeff=coeff, p_value=p_value, stat_func=stat_func, series_x=series_x, series_y=series_y, plot=plot)
def __init__(self, path, games, logger, suffix): super(YesNo, self).__init__(path, self.__class__.__name__, suffix) # basic storage for statistics yes_no = collections.defaultdict(list) number_yesno = collections.defaultdict(int) MAX = 15 for i, game in enumerate(games): if game.status == "incomplete": continue yn = [] for a in game.answers: a = a.lower() if a == "yes": number_yesno["yes"] +=1 yn.append(1) elif a == "no": number_yesno["no"] += 1 yn.append(0) else: number_yesno["n/a"] += 1 yn.append(0.5) no_question = len(game.answers) yes_no[no_question].append(yn) sns.set(style="whitegrid") max_no_question = min(MAX, max(yes_no.keys())) + 1 fig = None for key, yn in yes_no.items(): no_question = int(key) yn_mean = np.array(yn).mean(axis=0) if no_question < max_no_question : fig = sns.regplot(x=np.arange(1, no_question + 1, 1), y=yn_mean, lowess=True, scatter=False) #dummy legend sns.regplot(x=np.array([-1]), y=np.array([-1]), scatter=False, line_kws={'linestyle':'-'}, label="Ratio yes-no",ci=None, color="g") fig.legend(loc="best", fontsize='x-large') fig.set_xlim(1, max_no_question) fig.set_ylim(0.1, 1) fig.set_xlabel("Number of questions", {'size': '14'}) fig.set_ylabel('Ratio yes-no', {'size': '14'})
def __init__(self, path, games, logger, suffix): super(QuestionVsObject, self).__init__(path, self.__class__.__name__, suffix) ratio_q_object = [] for game in games: no_object = len(game.objects) no_question = len(game.questions) ratio_q_object.append([no_object,no_question]) ratio_q_object = np.array(ratio_q_object) sns.set(style="white") x = np.linspace(3, 20, 80) counter = collections.defaultdict(list) for k, val in ratio_q_object: counter[k] += [val] arr = np.zeros( [4, 21]) for k, val in counter.items(): if len(val) > 0: arr[0,k] = k arr[1,k] = np.mean(val) # Std arr[2, k] = np.std(val) # confidence interval 95% arr[3,k] = 1.95*np.std(val)/np.sqrt(len(val)) #plt.plot(arr[0,:],arr[1,:] , 'b.', label="Human behavior") sns.regplot(x=ratio_q_object[:, 0], y=ratio_q_object[:, 1], x_ci=None, x_bins=20, order=4, label="Human behavior", marker="o", line_kws={'linestyle':'-'}) plt.fill_between(x=arr[0,:], y1=arr[1,:]-arr[2,:], y2=arr[1,:]+arr[2,:], alpha=0.2) sns.regplot (x=x, y=np.log2(x), order=6, scatter=False, label="y = log2(x)", line_kws={'linestyle':'--'}) f = sns.regplot(x=x, y=x , order=1, scatter=False, label="y = x" , line_kws={'linestyle':'--'}) f.legend(loc="best", fontsize='x-large') f.set_xlim(3,20) f.set_ylim(0,20) f.set_xlabel("Number of objects", {'size':'14'}) f.set_ylabel("Number of questions", {'size':'14'})
def improvement_plot(consensus_data, ordered_genomes, improvement_tgt): def do_kdeplot(x, y, ax, n_levels=None, bw='scott'): try: sns.kdeplot(x, y, ax=ax, cut=0, cmap='Purples_d', shade=True, shade_lowest=False, n_levels=n_levels, bw=bw, rasterized=True) except: logger.warning('Unable to do a KDE fit to AUGUSTUS improvement.') pass with improvement_tgt.open('w') as outf, PdfPages(outf) as pdf, sns.axes_style("whitegrid"): for genome in ordered_genomes: data = pd.DataFrame(consensus_data[genome]['Evaluation Improvement']['changes']) unchanged = consensus_data[genome]['Evaluation Improvement']['unchanged'] if len(data) == 0: continue data.columns = ['transMap original introns', 'transMap intron annotation support', 'transMap intron RNA support', 'Original introns', 'Intron annotation support', 'Intron RNA support', 'transMap alignment goodness', 'Alignment goodness'] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2) for ax in [ax1, ax2, ax3]: # goodness plots are allowed to auto-set scale ax.set_xlim(0, 100) ax.set_ylim(0, 100) goodness_min = min(data['Alignment goodness']) ax4.set_xlim(goodness_min, 100) ax4.set_ylim(goodness_min, 100) do_kdeplot(data['transMap original introns'], data['Original introns'], ax1, n_levels=25, bw=2) sns.regplot(x=data['transMap original introns'], y=data['Original introns'], ax=ax1, color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False) do_kdeplot(data['transMap intron annotation support'], data['Intron annotation support'], ax2, n_levels=25, bw=2) sns.regplot(x=data['transMap intron annotation support'], y=data['Intron annotation support'], ax=ax2, color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False) do_kdeplot(data['transMap intron RNA support'], data['Intron RNA support'], ax3, n_levels=25, bw=2) sns.regplot(x=data['transMap intron RNA support'], y=data['Intron RNA support'], ax=ax3, color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False) do_kdeplot(data['transMap alignment goodness'], data['Alignment goodness'], ax4, n_levels=20, bw=1) sns.regplot(x=data['transMap alignment goodness'], y=data['Alignment goodness'], ax=ax4, color='#A9B36F', scatter_kws={"s": 3, 'alpha': 0.7, 'rasterized': True}, fit_reg=False) fig.suptitle('AUGUSTUS metric improvements for {:,} transcripts in {}.\n' '{:,} transMap transcripts were chosen.'.format(len(data), genome, unchanged)) for ax in [ax1, ax2, ax3, ax4]: ax.set(adjustable='box-forced', aspect='equal') fig.subplots_adjust(hspace=0.3) multipage_close(pdf, tight_layout=False)