def predictions_vs_actual_regression(model_results, model_name, size=6, bins=None, gridsize=30, outlier_ratio=None, **kwargs): holdout = model_results.holdout_data target = model_results.target if outlier_ratio is not None: holdout = utils.remove_outlier_rows(holdout, 'prediction', outlier_ratio) holdout = utils.remove_outlier_rows(holdout, target, outlier_ratio) sns.set(style="white", color_codes=True) marginal_kws = dict(hist_kws=dict(edgecolor='black')) plt.suptitle('{0}: Predictions vs Actual'.format(model_name), fontsize=14) grid = sns.jointplot('prediction', target, holdout, 'hexbin', gridsize=gridsize, size=size, bins=bins, space=0, marginal_kws=marginal_kws, **kwargs) plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) # shrink fig so cbar is visible cax = grid.fig.add_axes([.95, .18, .04, .5]) # x, y, width, height color_bar = sns.plt.colorbar(cax=cax) if bins is None: color_bar.set_label('count') elif bins == 'log': color_bar.set_label('log_10(count)') return grid
def graph_alternative(self, ctx, *, data : str): '''WIP''' filename = "data/temp/graph_alternative.png" seaborn.jointplot(**eval(data)).savefig(name) await self.bot.send_file(destination = ctx.message.channel, fp = filename, content = ctx.message.author.display_name + ':')
def _jointplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None, show_ticks: bool = True): import matplotlib.pyplot as plt import seaborn as sns import numpy filename = filename or self._get_new_figure_filename() length = min(len(first[property]), len(second[property])) first_prop = first[property][0:length] second_prop = second[property][0:length] lim = (0, max(max(first_prop), max(second_prop))) self._set_fig_size(size) x1 = pd.Series(first_prop, name="{descr}: {prop}".format(descr=first.description(), prop=property)) x2 = pd.Series(second_prop, name="{descr}: {prop}".format(descr=second.description(), prop=property)) plt.xlim(lim) g = None try: g = sns.jointplot(x1, x2, kind=self.misc["pair_kind"], size=size, space=0, stat_func=self.stats_helper.tester.test, xlim=lim, ylim=lim) if not show_ticks: g.ax_joint.set_xticklabels([]) g.ax_joint.set_yticklabels([]) g.savefig(filename) plt.close() except BaseException as ex: logging.warning(ex) return filename
def stock(): #?????????????????, ????????? stock_list = {"zsyh":"600036","jsyh":"601939","szzs":"000001","pfyh":"600000","msyh":"600061"} for stock, code in stock_list.items(): globals()[stock] = tsh.get_hist_data(code,start="2015-01-01",end="2016-04-16") stock_list2 = stock_list.keys() #print(stock_list2) sl = [globals()[st]["close"] for st in stock_list2] df_close = pd.concat(sl,axis=1,join='inner') df_close.columns = stock_list2 #print(df_close) df_close.sort_index(ascending=True,inplace=True) #ascending ?????????????????? pc_ret = df_close.pct_change() #???????????????? print(pc_ret) make_end_line() print(pc_ret.mean()) make_end_line() #???????????? plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="hex")) #?? ????????1?????????? 0????? -1???????? plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="scatter")) plt.show(sns.jointplot("zsyh","szzs",pc_ret,kind="scatter")) plt.show(sns.pairplot(pc_ret[["jsyh","zsyh","pfyh","msyh"]].dropna())) #?????????? print(pc_ret.std()) #???????????????????????????? make_end_line() rets = pc_ret.dropna() print(rets.mean()) make_end_line() area = np.pi *20 #???? plt.scatter(rets.mean(),rets.std()) #???rets?????????xy? plt.xlabel("Expected Return")#????xy???? plt.ylabel("Risk") for label,x,y in zip(rets.columns,rets.mean(),rets.std()): plt.annotate( label, xy = (x,y),xytext = (50,50), textcoords = "offset points",ha = "right",va = "bottom", arrowprops = dict(arrowstyle = "-",connectionstyle = "arc3,rad=-0.3")) plt.show()
def plot_correlation(self, on, x_col=None, plot_type="jointplot", stat_func=pearsonr, show_stat_func=True, plot_kwargs={}, **kwargs): """Plot the correlation between two variables. Parameters ---------- on : list or dict of functions or strings See `cohort.load.as_dataframe` x_col : str, optional If `on` is a dict, this guarantees we have the expected ordering. plot_type : str, optional Specify "jointplot", "regplot", "boxplot", or "barplot". stat_func : function, optional. Specify which function to use for the statistical test. show_stat_func : bool, optional Whether or not to show the stat_func result in the plot itself. plot_kwargs : dict, optional kwargs to pass through to plotting functions. """ if plot_type not in ["boxplot", "barplot", "jointplot", "regplot"]: raise ValueError("Invalid plot_type %s" % plot_type) plot_cols, df = self.as_dataframe(on, return_cols=True, **kwargs) if len(plot_cols) != 2: raise ValueError("Must be comparing two columns, but there are %d columns" % len(plot_cols)) for plot_col in plot_cols: df = filter_not_null(df, plot_col) if x_col is None: x_col = plot_cols[0] y_col = plot_cols[1] else: if x_col == plot_cols[0]: y_col = plot_cols[1] else: y_col = plot_cols[0] series_x = df[x_col] series_y = df[y_col] coeff, p_value = stat_func(series_x, series_y) if plot_type == "jointplot": plot = sb.jointplot(data=df, x=x_col, y=y_col, stat_func=stat_func if show_stat_func else None, **plot_kwargs) elif plot_type == "regplot": plot = sb.regplot(data=df, x=x_col, y=y_col, **plot_kwargs) elif plot_type == "boxplot": plot = stripboxplot(data=df, x=x_col, y=y_col, **plot_kwargs) else: plot = sb.barplot(data=df, x=x_col, y=y_col, **plot_kwargs) return CorrelationResults(coeff=coeff, p_value=p_value, stat_func=stat_func, series_x=series_x, series_y=series_y, plot=plot)
def main(): from argparse import ArgumentParser p = ArgumentParser() p.add_argument('--minlength', type=int, default=5) p.add_argument('--maxlength', type=int, default=30) p.add_argument('--examples', type=int, required=True) p.add_argument('--seed', type=int, default=None) p.add_argument('--profile', action='store_true') p.add_argument('--grammar', choices=('medium','big'), default='medium') p.add_argument('--aggressive', type=float, default=0, help='Pruning rate (zero=no pruning, one=lots of pruning).') args = p.parse_args() if args.profile: profile_run(examples = args.examples, grammar = args.grammar, maxlength = args.maxlength, minlength = args.minlength, aggressive = args.aggressive, seed = args.seed) else: d = run(examples = args.examples, grammar = args.grammar, maxlength = args.maxlength, minlength = args.minlength, aggressive = args.aggressive, seed = args.seed) filename_base = 'tmp/cp-analysis-' + '-'.join('%s_%s' % (k,v) for k,v in sorted(args.__dict__.items())) d.to_csv('%s.csv' % filename_base) p = sns.jointplot('ratio', 'speedup', d, kind='reg') p.savefig('%s.png' % filename_base) print '[info] wrote %s.csv' % filename_base print '== DONE ==' pl.ioff() pl.show()
def sb_jointplot(self, req, debug=False): image_list = [] image_filename = req["ImgFile"] import seaborn as sns import numpy as np from matplotlib import pyplot import matplotlib.pyplot as plt import matplotlib.dates as mdates from matplotlib.patches import Rectangle from matplotlib.finance import volume_overlay import pandas as pd from pandas.tseries.offsets import BDay source_df = req["SourceDF"] ds_name = req["DSName"] sns.set_style("whitegrid", {'axes.grid' : True}) sns.color_palette("Set1", n_colors=8, desat=.5) cur_xlabel = "measurement" cur_ylabel = "value" cur_kind = "reg" cur_width = 15.0 cur_height = 15.0 if "X" in req: cur_xlabel = str(req["X"]) if "Y" in req: cur_ylabel = str(req["Y"]) if "Width" in req: cur_width = float(req["Width"]) if "Height" in req: cur_height = float(req["Height"]) if "Kind" in req: cur_kind = str(req["Kind"]) # end of parsing inputs # Add custom plots here grid = sns.jointplot(cur_xlabel, cur_ylabel, kind=cur_kind, data=source_df, annot_kws=dict(stat="r")) if debug: self.lg("Saving File(" + str(image_filename) + ")", 6) grid.fig.set_figwidth(cur_width) grid.fig.set_figheight(cur_height) ax = grid.ax_joint self.pd_add_footnote(ax.figure) ax.figure.savefig(image_filename) image_list.append(image_filename) if req["ShowPlot"] == True: plt.show() return image_list # end of sb_jointplot
def testcase1(): tips=sns.load_dataset('tips') sns.jointplot('total_bill','tips',tips,kind='reg')
def scatterCorr(arrayA, arrayB, threshold, outPath): """ Interpretation of strength of correlation very weak: < 0,15 weak: 0,15-0,25 moderate: 0,25-0,40 strong: 0,40-0,75 very strong: >0,75 """ corr = stats.spearmanr(arrayA, arrayB) coefficient = float(format(corr[0], '.3f')) pvalue = float(corr[1]) print "pvalue: ", pvalue ## Make scatterplot if rho >= threshold or <= -theshold if (coefficient >= threshold) or (coefficient <= -threshold): # Make scatterplot fig = plt.figure(figsize=(6,6)) ax1 = fig.add_subplot(1, 1, 1) #plot = sns.jointplot(x=arrayA, y=arrayB, kind="hex", xlim=(0,40), gridsize=50, dropna=True, cmap="Blues", stat_func=spearmanr) plot = sns.jointplot(x=arrayA, y=arrayB, kind="kde", space=0, xlim=(0,30), gridsize=50, dropna=True, cmap="Blues", stat_func=spearmanr) plt.xlabel('# L1', fontsize=12) plt.ylabel('Replication time', fontsize=12) # sns.plt.subplots_adjust(left=0.2, right=0.8, top=0.8, bottom=0.2) # shrink fig so cbar is visible # cax = plot.fig.add_axes([.85, .25, .05, .4]) # x, y, width, height # sns.plt.colorbar(cax=cax) #sns.jointplot(x=arrayA, y=arrayB, kind="kde", space=0, color="b", xlim=(0,30)) ## Save figure fileName = outPath + '_' + str(coefficient) + '_correlation.pdf' plt.savefig(fileName) return coefficient, pvalue #### MAIN #### ## Import modules ##