def draw(self): """ Plot a vertical line for each token in the current data table. The line is drawn in a subplot matching the factor level combination in that row. The horizontal position corresponds to the token id so that tokens that occur in the same part of the corpus will also have lines that are placed close to each other. """ def plot_facet(data, color): lineplot( x=data["coquery_invisible_corpus_id"], y=data[self._groupby[-1]], order=self._levels[-1], palette=self.options["color_palette_values"], data=data) #sns.despine(self.g.fig, #left=False, right=False, top=False, bottom=False) self.map_data(plot_facet) self.g.set_axis_labels(utf8(self.options["label_x_axis"]), utf8(self.options["label_y_axis"])) self.g.set(xlim=(0, options.cfg.main_window.Session.Corpus.get_corpus_size(filters=[])))
def cross_section_cndl(data, factor_name): '''??????????????? ?????????????? ?? ------------------------------ data:DataFrame(index:[Date,IDs],factor1,factor2,...) factor_name:str ''' data = data.reset_index() sns.set(style='ticks') ax = sns.boxplot(x='Date', y=factor_name, data=data, palette='PRGn') sns.despine(offset=10, trim=True) return ax # ??2 # ?????, ?????????????
def swarm(data,x,y,xscale='linear',yscale='linear'): # set default pretty settings from Seaborn sns.set(style="white", palette="muted") sns.set_context("notebook", font_scale=1, rc={"lines.linewidth": 0.2}) # createthe plot g = sns.swarmplot(x=x, y=y, data=data, palette='RdYlGn') plt.tick_params(axis='both', which='major', pad=10) g.set(xscale=xscale) g.set(yscale=yscale) # Setting plot limits start = data[y].min().min() plt.ylim(start,); sns.despine()
def histogram(data,variables): sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 0}) sns.set_style('white') var_length = len(variables) fig, axes = plt.subplots(1, var_length, figsize=(19, 5)) for i in range(var_length): axes[i].hist(data[variables[i]],lw=0,color="indianred",bins=8); axes[i].tick_params(axis='both', which='major', pad=15) axes[i].set_xlabel(variables[i]) axes[i].set_yticklabels(""); sns.despine(left=True)
def plot_time_corrs(subjects, axes): x = np.arange(1, 5) palette = [".2", ".5"] for subj, ax in zip(subjects, axes): res_fname = "correlation_analysis/{}_rest_ifs.pkz".format(subj) res = moss.load_pkl(res_fname) for line, color in zip(res.corr_times.T, palette): ax.plot(x, line, "o-", color=color, ms=3, clip_on=False) sig = res.corr_times_pctiles > 95 ax.plot(x[sig], np.ones(sig.sum()) * .0025, marker=(6, 2, 0), ls="", mew=.35, mec=".2", ms=3) ax.set(xticks=x, xlim=(.6, 4.4), ylim=(0, .07)) sns.despine(ax=ax, trim=True) plt.setp(axes[1:], yticklabels=[]) axes[0].set_ylabel("Correlation (r)")
def image(path, costs): ys = ['0', '1', '2', '3', '4', '5', '6', '7+', 'X'] xs = [costs.get(k, 0) for k in ys] sns.set_style('white') sns.set(font='Concourse C3', font_scale=3) g = sns.barplot(ys, xs, palette=['grey'] * len(ys)) g.axes.yaxis.set_ticklabels([]) rects = g.patches sns.set(font='Concourse C3', font_scale=2) for rect, label in zip(rects, xs): if label == 0: continue height = rect.get_height() g.text(rect.get_x() + rect.get_width()/2, height + 0.5, label, ha='center', va='bottom') g.margins(y=0, x=0) sns.despine(left=True, bottom=True) g.get_figure().savefig(path, transparent=True, pad_inches=0, bbox_inches='tight') plt.clf() # Clear all data from matplotlib so it does not persist across requests. return path
def plot_decision_function(score_df, partition, output_file): """ Plots the decision function for a given partition (either 'train' or 'test') and saves a figure to file. Arguments: :param score_df: a specific folds decision scores and status :param partition: either 'train' or 'test' will plot performance :param output_file: file to output the figure """ ax = sns.kdeplot(score_df.ix[(score_df.status == 1) & (score_df.partition == partition), :] .decision, color='red', label='Deficient', shade=True) ax = sns.kdeplot(score_df.ix[(score_df.status == 0) & (score_df.partition == partition), :] .decision, color='blue', label='Wild-Type', shade=True) ax.set(xlabel='Decision Function', ylabel='Density') ax.set_title('Classifier Decision Function') sns.despine() plt.tight_layout() plt.savefig(output_file) plt.close()
def makeDishwasherFig(ax=None, zNorm=True, save=True): # ts = getGoodDishwasherTs() # ts.data = ar.zNormalizeCols(ts.data) ts = getFig1Ts(zNorm=True, whichTs=WHICH_DISHWASHER_TS) # ax = ts.plot(useWhichLabels=['ZC'], showLabels=False, capYLim=900) colors = DISHWASHER_COLOR_PALETTE * 3 # cycles thru almost three times colors[DISHWASHER_DIM_TO_HIGHLIGHT] = DISHWASHER_HIGHLIGHT_COLOR colors = colors[:ts.data.shape[1]] ts.data[:, 2] /= 2 # scale the ugliest dim to make pic prettier ax = ts.plot(showLabels=False, showBounds=False, capYLim=900, ax=ax, colors=colors) # resets palette... # ax = ts.plot(showLabels=False, showBounds=False, capYLim=900, ax=None) # works # ax.plot(ts.data[:, DISHWASHER_DIM_TO_HIGHLIGHT], color=DISHWASHER_HIGHLIGHT_COLOR) # sb.set_palette(DEFAULT_SB_PALETTE) sb.despine(left=True) ax.set_title("Dishwasher", y=TITLE_Y_POS) # ax.set_xlabel("Minute") plt.tight_layout() if save: saveFigWithName('dishwasher') # ------------------------------------------------ MSRC
def makeGarbageDimTs(): np.random.seed(123) seqLen = 750 squareLen = seqLen / 17. seq = synth.notSoRandomWalk(seqLen, std=.05, trendFilterLength=(seqLen // 2), lpfLength=2) sb.set_style('white') _, ax = plt.subplots() # color = sb.color_palette()[1] # ax.plot(seq, lw=4, color="#660000") # red I'm using in keynote ax.plot(seq, lw=4, color="#CC0000") # red I'm using in keynote ax.set_xlim([-squareLen, seqLen + squareLen]) ax.set_ylim([np.min(seq) * 2, np.max(seq) * 2]) sb.despine(left=True) plt.show() # def makeMethodsWarpedTs(): # ================================================================ Better Fig1
def _generic_histogram(bars, legend_labels, title_string, pdf, ax, fig, ylabel, names, box_label, bbox_to_anchor): fig.legend([x[0] for x in bars[::-1]], legend_labels[::-1], bbox_to_anchor=bbox_to_anchor, frameon=True, title=box_label) ax.set_title(title_string) ax.set_ylabel(ylabel) set_ticks(names, ax) ax.xaxis.set_ticks(np.arange(0, len(names)) + bar_width / 2.0) sns.despine(top=True, right=True) multipage_close(pdf)
def plot_sequence_count(flu, fname=None, fs=12): # make figure with region counts import seaborn as sns date_bins = pivots_to_dates(flu.pivots) sns.set_style('ticks') region_label = {'global': 'Global', 'NA': 'N America', 'AS': 'Asia', 'EU': 'Europe', 'OC': 'Oceania'} regions_abbr = ['global', 'NA', 'AS', 'EU', 'OC'] region_colors = {r:col for r, col in zip(regions_abbr, sns.color_palette(n_colors=len(regions_abbr)))} fig, ax = plt.subplots(figsize=(8, 3)) count_by_region = flu.mutation_frequency_counts drop = 3 tmpcounts = np.zeros(len(flu.pivots[drop:])) plt.bar(date_bins[drop:], count_by_region['global'][drop:], width=18, \ linewidth=0, label="Other", color="#bbbbbb", clip_on=False) for region in region_groups: if region!='global': plt.bar(date_bins[drop:], count_by_region[region][drop:], bottom=tmpcounts, width=18, linewidth=0, label=region_label[region], color=region_colors[region], clip_on=False) tmpcounts += count_by_region[region][drop:] make_date_ticks(ax, fs=fs) ax.set_ylabel('Sample count') ax.legend(loc=3, ncol=1, bbox_to_anchor=(1.02, 0.53)) plt.subplots_adjust(left=0.1, right=0.82, top=0.94, bottom=0.22) sns.despine() if fname is not None: plt.savefig(fname)
def explore_feature_variation(self, col=None, use_target=False, **kwargs): ''' Produces univariate plots of a given set of columns. Barplots are used for categorical columns while histograms (with fitted density functinos) are used for numerical columns. If use_target is true, then the variation of the given set of columns with respect to the response variable are used (e.g., 2d scatter plots, boxplots, etc). Parameters ---------- col : a string of a column name, or a list of many columns names or None (default). If col is None, all columns will be used. use_target : bool, default False Whether to use the target column in the plots. **kwargs: additional arguments to be passed to seaborn's distplot or to pandas's plotting utilities.. ''' self._validate_params(params_list = {'col':col}, expected_types= {'col':[str,list,type(None)]}) if type(col) is str: col = [col] if col is None: col = self._get_all_features() if use_target == False: for column in col: if self.is_numeric(self.df[column]) == True: plt.figure(column) #sns.despine(left=True) sns.distplot(self.df[column], color="m", **kwargs) plt.title(column) plt.tight_layout() #plt.figure('boxplot') #sns.boxplot(x=self.df[col], palette="PRGn") #sns.despine(offset=10, trim=True) elif self.is_categorical(self.df[column]) == True: #print self.df[column].describe() plt.figure(column) #sns.despine(left=True) if len(self.df[column].unique()) > 30: self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs) #top = pd.DataFrame(data=top) #sns.barplot(y=top.index, x=top) else: self.df[column].value_counts()[::-1].plot.barh(**kwargs) #sns.countplot(y=self.df[column]) plt.title(column) plt.tight_layout() else: raise TypeError('TYPE IS NOT SUPPORTED') else: # use target variable for column in col: self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
def kde(x,y,title='',color='YlGnBu',xscale='linear',yscale='linear'): sns.set_style('white') sns.set_context('notebook', font_scale=1, rc={"lines.linewidth": 0.5}) g = sns.kdeplot(x,y,shade=True, cut=2, cmap=color, shade_lowest=False, legend=True, set_title="test") plt.tick_params(axis='both', which='major', pad=10) sns.plt.title(title) g.set(xscale=xscale) g.set(yscale=yscale) sns.despine()
def regression(data,x,y,xscale='linear',yscale='linear'): sns.set_context("notebook", font_scale=.8, rc={"lines.linewidth": 0}) sns.set_style('white') g = sns.regplot(x=x, y=y, data=data) plt.tick_params(axis='both', which='major', pad=10) g.set(xscale=xscale) g.set(yscale=yscale) sns.despine()
def plot_points(df, axes): for exp, ax in zip(["dots", "sticks", "rest"], axes): exp_df = pd.melt(df.query("exp == @exp"), "subj", ["within", "between"], "test", "corr") sns.pointplot(x="test", y="corr", hue="test", data=exp_df, dodge=.5, join=False, ci=95, palette=[".15", ".5"], ax=ax) plt.setp(ax.lines, linewidth=2) sns.pointplot(x="test", y="corr", hue="subj", data=exp_df, palette=[".75"], scale=.75, ax=ax) plt.setp(ax.collections[:], facecolor="w", zorder=20) ax.legend_ = None ax.set(ylabel="", xlabel="", xticks=[-.1, 1.1], xticklabels=["Same\ncontext", "Different\ncontext"]) axes[0].set(ylim=(0, .105), ylabel="Timeseries correlation (r)") axes[1].set(ylim=(0, .0525)) axes[2].set(ylim=(0, .0525)) for ax in axes: sns.despine(ax=ax, trim=True)
def plot_prediction_curves(subjects, axes, exp): res_ftemp = "spatial_analysis/{}_{}_ifs.pkz" for subj, ax in zip(subjects, axes): res = moss.load_pkl(res_ftemp.format(subj, exp)) x = res.steps norm = res.null.mean() real = res.real / norm pint = res.pint / norm ax.plot(x, real, "o-", color=".15", ms=2.5, clip_on=False) ax.fill_between(x, *pint, color=".4", alpha=.3) cross_x, cross_y = res.intersect cross_y /= norm ax.plot([cross_x, cross_x], [0, cross_y], lw=.8, dashes=[3, 1], color=".5", zorder=0) ax.set(xlim=(0, 40), ylim=(0, 2), xticks=np.linspace(0, 40, 5), yticks=[0, 1, 2], yticklabels=[0, 1, 2]) sns.despine(ax=ax) ylabel = "Normalized error" plt.setp(axes[1:7], yticklabels=[]) axes[0].set(ylabel=ylabel) if exp == "dots": plt.setp(axes[8:], yticklabels=[]) plt.setp(axes[:7], xticklabels=[]) axes[7].set_ylabel(ylabel)
def plot_distance_corrs(subjects, axes, exp): for subj, ax in zip(subjects, axes): res_fname = "correlation_analysis/{}_{}_ifs.pkz".format(subj, exp) res = moss.load_pkl(res_fname) x = res.distance_thresh for dim, color, marker in zip(["3D", "2D"], [".5", ".2"], ["x", "+"]): same, diff = res.corr_distance[dim].T ax.plot(x, same - diff, "o-", color=color, ms=3, clip_on=False) sig = res.corr_distance_pctiles[dim] > 95 stary = -.005 if exp == "dots" else -.0025 ax.plot(x[sig], np.ones(sig.sum()) * stary, marker=marker, ls="", mew=.35, mec=".2", ms=3) ylim = (-.01, .08) if exp == "dots" else (-.005, .04) yticks = np.array([0, .01, .02, .03, .04]) yticks = yticks * 2 if exp == "dots" else yticks ax.set(xlim=(-2, 42), ylim=ylim, yticks=yticks) sns.despine(ax=ax, trim=True) ylabel = "Subnetwork strength\n($r_{\mathrm{same}} - r_{\mathrm{diff}}$)" plt.setp(axes[1:7], yticklabels=[]) axes[0].set_ylabel(ylabel) if exp == "dots": plt.setp(axes[8:], yticklabels=[]) plt.setp(axes[:7], xticklabels=[]) axes[7].set_ylabel(ylabel)
def plot_swarms(df, axes, palette): for exp, ax in zip(["dots", "sticks"], axes): exp_df = df.query("experiment == @exp") ax.axhline(.5, .1, .9, dashes=(5, 2), color=".6") ax.set(ylim=(.4, .9), yticks=[.4, .5, .6, .7, .8, .9]) sns.pointplot(x="roi", y="acc", data=exp_df, palette=palette, join=False, ci=None, ax=ax) points_to_lines(ax, lw=3) sns.swarmplot(x="roi", y="acc", data=exp_df, size=4, color=".85", # facecolor="none", linewidth=1, edgecolor=".4", ax=ax) ax.set(xlabel="", ylabel="", xticklabels=["IFS", "MFC"]) ax_l, ax_r = axes ax_l.set(ylabel="Decoding accuracy") ax_r.set(yticks=[]) ax_l.text(.5, .91, "Experiment 1", ha="center", va="center", size=7.5) ax_r.text(.5, .91, "Experiment 2", ha="center", va="center", size=7.5) sns.despine(ax=ax_l, trim=True) sns.despine(ax=ax_r, left=True, trim=True)
def plot_cluster_error(ax): res_ftemp = "spatial_analysis/{}_{}_ifs.pkz" for exp in ["dots", "sticks"]: subjects = get_subject_order(exp) color = get_colormap(exp, as_cmap=False)[20] errs = [] for subj in subjects: res = moss.load_pkl(res_ftemp.format(subj, exp)) x = res.steps norm = res.null.mean() errs.append(res.real / norm) errs = np.vstack(errs) mean = errs.mean(axis=0) ax.plot(x, mean, color=color, lw=2) sem = stats.sem(errs, axis=0) ax.fill_between(x, mean - sem, mean + sem, alpha=.2, color=color) ax.axhline(y=1, lw=1, dashes=[5, 2], color=".5", zorder=0, xmin=.02, xmax=.98) ax.set(xlim=(0, 42), ylim=(.55, 1.45), yticks=[.6, .8, 1, 1.2, 1.4], xticks=[0, 10, 20, 30, 40], xlabel="Neighborhood radius (mm)", ylabel="Normalized error") sns.despine(ax=ax, trim=True)
def plot_corrmats(subjects, axes, exp): for subj, ax in zip(subjects, axes): fname = "correlation_analysis/{}_{}_ifs.pkz".format(subj, exp) corrmat = moss.load_pkl(fname).corrmat ax.imshow(corrmat - np.eye(len(corrmat)), cmap="RdBu_r", vmin=-.15, vmax=.15, rasterized=True) ax.set(xticks=[], yticks=[]) sns.despine(ax=ax, left=True, bottom=True)
def plot_scatters(subjects, axes): ftemp = "correlation_analysis/{}_{}_ifs.pkz" for subj, ax in zip(subjects, axes): sticks = moss.load_pkl(ftemp.format(subj, "sticks")).corrmat rest = moss.load_pkl(ftemp.format(subj, "rest")).corrmat triu = np.triu_indices_from(rest, 1) ax.scatter(sticks[triu], rest[triu], s=3, linewidth=.2, color=".6", edgecolor="w", rasterized=True) ax.plot([-.2, .8], [-.2, .8], lw=1, dashes=(5, 2), color=".3") plt.setp(axes, xlim=(-.25, .8), ylim=(-.25, .8), xticks=np.linspace(-.2, .8, 6), yticks=np.linspace(-.2, .8, 6), aspect="equal") plt.setp(axes[1:], yticklabels=[]) for ax in axes: sns.despine(ax=ax, trim=True) plt.setp(ax.get_xticklabels(), size=6) plt.setp(ax.get_yticklabels(), size=6)
def plot_kdes(subjects, axes): ftemp = "correlation_analysis/{}_{}_ifs.pkz" for subj, ax in zip(subjects, axes): sticks = moss.load_pkl(ftemp.format(subj, "sticks")).corrmat rest = moss.load_pkl(ftemp.format(subj, "rest")).corrmat triu = np.triu_indices_from(rest, 1) sns.kdeplot(sticks[triu], color=".15", label="residual", ax=ax) sns.kdeplot(rest[triu], color=".45", dashes=[4, 1], label="resting", ax=ax) plt.setp(axes, xlim=(-.25, .8), ylim=(0, 17), xticks=np.linspace(-.2, .8, 6), yticks=[]) for ax in axes: sns.despine(ax=ax, left=True, trim=True) plt.setp(ax.get_xticklabels(), size=6) plt.setp(ax.get_yticklabels(), size=6) axes[0].legend(bbox_to_anchor=(1.2, .8)) for ax in axes[1:]: ax.legend_ = None
def start_plotting(fig_size, fig_pos, style="white", rc=None, despine=False): with sns.axes_style(style, rc): fig = plt.figure(figsize=fig_size) if not fig_pos: ax = fig.add_subplot(111) else: ax = fig.add_axes(fig_pos) if despine: sns.despine(left=True) return fig, ax
def plot_box( self, fname_out = None): sns.boxplot(x="Method", y="r2", data=self.df_best_expand, palette="PRGn") sns.despine(offset=10, trim=True) plt.ylabel( r"$r^2$") plt.xlabel( "Methods") if fname_out is not None: plt.savefig( fname_out) # index should be stored. elif self.fname is not None: fname_out = self.fname[:-4] + '_box.eps' print( 'Default: the figure of self.df_best_expand is saved to', fname_out) plt.savefig( fname_out)
def boxplot_expension( pdr, method_l, x="Group", y="RP", hue="Method"): # method_l = ['No_Regression', 'Mean_Compensation', 'Linear', 'Exp'] val_s = y pdw = expension_4_boxplot( pdr, method_l, x=x, y=y, hue=hue) sns.boxplot(x="Group", y=val_s, hue="Method", data=pdw, palette="PRGn") sns.despine(offset=10, trim=True)
def print_2D( points,label,id_map ): ''' points: N_samples * 2 label: (int) N_samples id_map: map label id to its name ''' fig = plt.figure() #current_palette = sns.color_palette("RdBu_r", max(label)+1) n_cell,_ = points.shape if n_cell > 500: s = 10 else: s = 20 ax = plt.subplot(111) print( np.unique(label) ) for i in np.unique(label): ax.scatter( points[label==i,0], points[label==i,1], c=current_palette[i], label=id_map[i], s=s,marker=markers_keys[i] ) box = ax.get_position() ax.set_position([box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) ax.legend(scatterpoints=1,loc='upper center', bbox_to_anchor=(0.5,-0.08),ncol=6, fancybox=True, prop={'size':8} ) sns.despine() return fig
def makeMsrcFig(ax=None, save=True): ts = getGoodMsrcTs() ax = ts.plot(showLabels=False, showBounds=False, ax=ax) sb.despine(left=True) ax.set_title("MSRC-12", y=TITLE_Y_POS) # ax.set_xlabel("Time (sample)") plt.tight_layout() if save: saveFigWithName('msrc') # ------------------------------------------------ UCR
def makeUcrFig(ax=None, save=True): ts = getGoodUcrTs() ax = ts.plot(showLabels=False, showBounds=False, ax=ax, linewidths=3.) sb.despine(left=True) ax.set_title("UCR", y=TITLE_Y_POS) # ax.set_xlabel("Time (sample)") plt.tight_layout() if save: saveFigWithName('ucr') # ------------------------------------------------ Tidigits # @memory.cache
def makeTidigitsFig(ax=None, save=True, whichTs=-1): ts = getGoodTidigitsTs(whichTs=whichTs) # ts.data = ar.meanNormalizeCols(ts.data) ax = ts.plot(showLabels=False, showBounds=False, ax=ax, linewidths=3.) sb.despine(left=True) ax.set_title("TIDIGITS", y=TITLE_Y_POS) # ax.set_xlabel("Time (sample)") plt.tight_layout() if save: saveFigWithName('tidigits') # ------------------------------------------------ Combined
def makeFig1(): ts = getFig1Ts() # set up axes ax1 = plt.subplot2grid((2,2), (0,0), colspan=2) ax2 = plt.subplot2grid((2,2), (1,0)) ax3 = plt.subplot2grid((2,2), (1,1)) axes = [ax1, ax2, ax3] for ax in axes: ax.autoscale(tight=True) sb.despine(left=True, ax=ax) ts.plot(showLabels=False, showBounds=False, ax=ax1) lengths = [150] ts_sota = labelTs_sota(ts, lengths) ts_sota.plot(showLabels=False, ax=ax2) ts_ff = labelTs_ff(ts, 100, 200) # Lmin, Lmax ts_ff.plot(showLabels=False, ax=ax3) plt.setp(ax3.get_yticklabels(), visible=False) ax1.set_title("Patterns in Dishwasher Dataset") ax1.set_xlabel("Minute") ax2.set_title("State-of-the-art") ax3.set_title("Proposed") plt.tight_layout() plt.show()
def enrich_signature(method="pca", percentile=99, results_dir="results", experiment="CROP-seq_Jurkat_TCR", n_genes=500): """ """ diff = pd.read_csv(os.path.join(results_dir, "{}.differential_expression.{}.stimutation.csv".format(experiment, method)), squeeze=True, index_col=0, header=None, names=["gene_name"]) degs = pd.Series(diff[abs(diff) > np.percentile(abs(diff), percentile)].index) degs.name = "gene_name" enr = enrichr(degs.reset_index()) enr.to_csv(os.path.join(results_dir, "differential_expression.{}.enrichr.csv".format(method)), index=False, encoding="utf8") # Plot top N terms of each library n = 8 to_plot = [ 'GO_Biological_Process_2015', "KEGG_2016", "WikiPathways_2016", "Reactome_2016", "BioCarta_2016", "NCI-Nature_2016"] p = enr.ix[enr[enr['gene_set_library'].isin(to_plot)].groupby("gene_set_library")['combined_score'].nlargest(n).index.get_level_values(1)].sort_values("combined_score", ascending=False) fig, axis = plt.subplots(1) sns.barplot(data=p, y="description", x="combined_score", orient="horiz", hue="gene_set_library") axis.set_xlabel("Combined score") sns.despine(fig) fig.savefig(os.path.join(results_dir, "differential_expression.{}.enrichr.top{}_terms.svg".format(method, n)), bbox_inches="tight")
def gRNA_scatter(s1, s2, prefix="", text=False, n_labels=30): # Scatter of gRNA change fig, axis = plt.subplots(3, 2, sharex=False, sharey=False, figsize=(8, 8)) axis = axis.flatten() for i, screen in enumerate(s2.columns[::-1]): x = s1.join(s2) # .fillna(0) x = x.iloc[np.random.permutation(len(x))] x = x.ix[x.index[~x.index.str.contains("Wnt")]] if prefix.startswith("mid_screen-"): b = x["gDNA_Jurkat"] else: b = x["plasmid_pool_TCR"] x = x.fillna(0) b = b.fillna(0) colors = pd.DataFrame() colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt") colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL") colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr") colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess") colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist() axis[i].scatter(np.log2(1 + x[screen]), np.log2(1 + b), color=colors, alpha=0.5) if text: for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index: axis[i].text(np.log2(1 + x[screen].ix[j]), np.log2(1 + b.ix[j]), j) # x = y line lims = [np.nanmin([np.log2(1 + x[screen]), np.log2(1 + b)]), np.nanmax([np.log2(1 + x[screen]), np.log2(1 + b)])] axis[i].plot((lims[0], lims[1]), (lims[0], lims[1]), linestyle='--', color='black', alpha=0.75) axis[i].set_title(screen) for i in range(0, len(axis), 2): axis[i].set_ylabel("gRNA frequency in plasmid (log2)") for ax in axis[-2:]: ax.set_xlabel("gRNA frequency in CROP-seq screen (log2)") sns.despine(fig) fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.scatter.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight") fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.scatter.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight")
def main(args): with FastaReader(args.fasta) as fr: sequences = list(fr) logger.info('Plotting dendrogram of %s sequences', len(sequences)) if args.mark: with FastaReader(args.mark) as fr: mark = PrefixComparer(record.sequence for record in fr) labels = [] n_new = 0 for record in sequences: if record.sequence not in mark: extra = ' (new)' n_new += 1 else: extra = '' labels.append(record.name + extra) logger.info('%s sequence(s) marked as "new"', n_new) else: labels = [s.name for s in sequences] sns.set_style("white") font_size = 297 / 25.4 * 72 / (len(labels) + 5) font_size = min(16, max(6, font_size)) height = font_size * (len(labels) + 5) / 72 fig = plt.figure(figsize=(210 / 25.4, height)) matplotlib.rcParams.update({'font.size': 4}) ax = fig.gca() sns.despine(ax=ax, top=True, right=True, left=True, bottom=True) sns.set_style('whitegrid') if len(sequences) >= 2: m = distances([s.sequence for s in sequences]) y = distance.squareform(m) mindist = int(y.min()) logger.info('Smallest distance is %s. Found between:', mindist) for i,j in np.argwhere(m == y.min()): if i < j: logger.info('%s and %s', labels[i], labels[j]) l = hierarchy.linkage(y, method=args.method) hierarchy.dendrogram(l, labels=labels, leaf_font_size=font_size, orientation='right', color_threshold=0.95*max(l[:,2])) else: ax.text(0.5, 0.5, 'no sequences', fontsize='xx-large') ax.grid(False) fig.set_tight_layout(True) fig.savefig(args.plot)
def plot_box(df, x, y, hue, tag='eda', directory=None): r"""Display a Box Plot. Parameters ---------- df : pandas.DataFrame The dataframe containing the ``x`` and ``y`` features. x : str Variable name in ``df`` to display along the x-axis. y : str Variable name in ``df`` to display along the y-axis. hue : str Variable name to be used as hue, i.e., another data dimension. tag : str Unique identifier for the plot. directory : str, optional The full specification of the plot location. Returns ------- None : None. References ---------- http://seaborn.pydata.org/generated/seaborn.boxplot.html """ logger.info("Generating Box Plot") # Generate the box plot box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df) sns.despine(offset=10, trim=True) box_fig = box_plot.get_figure() # Save the plot write_plot('seaborn', box_fig, 'box_plot', tag, directory) # # Function plot_swarm #
def wasabiplot(bam_filename, chrom, start, stop, strand, log_base=10, color='steelblue', bad_cigar=INSERTION_DELETIONS, coverage_cigar=COVERAGE_CIGAR, junction_cigar=JUNCTION_CIGAR, ax=None, coverage_kws=None, curve_height_multiplier=0.2, text_kws=TEXT_KWS, patch_kws=PATCH_KWS, warn_skipped=True, annotate=True, **kwargs): """Get the number of reads that matched to the reference sequence Parameters ---------- bam_filename : str Name of the bam filename for logging purposes chrom : str Name of the reference chromosome start, stop : int Genome-based locations of the start and stop regions strand : '+' | '-' Strand to query log_base : number or None, optional The base to use for log-scaling the data. e.g. 10 would have log10 data If None, the data is not log-scaled. (default=10) color : valid matplotlib color Color to use for both the coverage and junction plotting allowed_cigar : tuple of str, optional Which CIGAR string flags are allowed. (default=('M') aka match) bad_cigar : tuple of str, optional Which CIGAR string flags are not allowed. (default=('I', 'D') aka insertion and deletion) """ if isinstance(bam_filename, pd.Series): bam_filename = bam_filename.iloc[0] plotter = WasabiPlotter(bam_filename, chrom, start, stop, strand, log_base, color, bad_cigar, coverage_cigar, junction_cigar, warn_skipped) if ax is None: ax = plt.gca() coverage_kws = {} if coverage_kws is None else coverage_kws coverage_kws.update(kwargs) plotter.plot_coverage(color, ax, **coverage_kws) plotter.plot_junctions(ax, curve_height_multiplier=curve_height_multiplier, text_kws=text_kws, patch_kws=patch_kws, annotate=annotate) # Remove bottom spine sns.despine(ax=ax, bottom=True) # Add a zero-axis line ax.hlines(0, 0, plotter.length, linewidth=0.5, zorder=-1) if ax.is_last_row(): xticks = [int(x + start) for x in ax.get_xticks()] ax.set(xticklabels=xticks)
def bars(data,color='black',title=''): data = pd.DataFrame(data.value_counts()) data = data.reset_index() data.columns = ['keyword','value'] data['keyword'] = data['keyword'][1:] data = data.dropna() data = data.reset_index(drop=True) data = data.sort_values('value',ascending=False) sns.set_context("notebook", font_scale=1.2, rc={"lines.linewidth": 0}) x = data.head(20)['keyword'].astype(str) y = data.head(20)['value'].astype(int) f, ax = plt.subplots(figsize=(16, 3)) sns.set_style('white') ## change color of the bar based on value colors = [color if _y >=0 else 'red' for _y in y] sns.barplot(x, y, palette=colors, ax=ax) plt.title(title, fontsize=18, y=1.12, color="gray"); ax.set_xticklabels('') ax.set_ylabel('') ax.set_xlabel('') ax.tick_params(axis='both', which='major', pad=30) for n, (label, _y) in enumerate(zip(x, y)): ax.annotate( s='{:.1f}'.format(abs(_y)), xy=(n, _y), ha='center',va='center', xytext=(0,-10), size=12, textcoords='offset points', color="white", weight="bold" ) ax.set_yticklabels(""); ax.set_xticklabels(data.head(20)['keyword'],rotation=25,ha="right"); ax.tick_params(axis='both', which='major', pad=15) sns.despine(left=True)
def plotStackedBarsScalar(df, indexCol, columns, valuesCol, box=False, rotation=90, zeroLine=False, title="", xlabel='', ylabel='', ncol=5, ygrid=False, yticks=False, ymin=None, ymax=None, barWidth=0.5, legendY=None, palette=None, outFile=None, sideLabel=False, labelColor=None, yFormat=None, transparent=False, openFile=False, closeFig=True): ''' Plot a stacked bar plot using data in df, given the index column, the column holding the values to pivot to columns, and the column holding the values. The argument 'ncol' specifies the number of columns with which to render the legend. ''' #_logger.debug('plotStackedBarsScalar %s', sideLabel) setupPlot() # TBD: handle year values as columns to plot df2 = df[[indexCol, columns, valuesCol]].pivot(index=indexCol, columns=columns, values=valuesCol) setupPalette(len(df2.columns), pal=palette) fig, ax = plt.subplots(1, 1, figsize=(8, 4)) df2.plot(kind='bar', stacked=True, ax=ax, grid=False, width=barWidth, rot=rotation) if box == False: sns.despine(left=True) if yticks: plt.tick_params(axis='y', direction='out', length=5, width=.75, colors='k', left='on', right='off') if zeroLine: ax.axhline(0, color='k', linewidth=0.75, linestyle='-') if ygrid: ax.yaxis.grid(color='lightgrey', linestyle='solid') plt.xlabel(xlabel) plt.ylabel(ylabel) legendY = -0.6 if legendY is None else legendY ax.legend(loc='upper center', bbox_to_anchor=(0.5, legendY), ncol=ncol) if title: ax.set_title(title, y=1.05) if ymin is not None or ymax is not None: ax.set_autoscale_on(False) ax.set_ylim(ymin, ymax) _finalizeFigure(fig, ax, outFile=outFile, sideLabel=sideLabel, labelColor=labelColor, yFormat=yFormat, transparent=transparent, openFile=openFile, closeFig=closeFig) return (fig, ax)
def plotStackedTimeSeries(df, index='region', xlabel='', ylabel='', ncol=5, box=False, zeroLine=False, title="", ygrid=False, yticks=False, ymin=None, ymax=None, barWidth=0.5, legendY=None, yearStep=5, palette=None, outFile=None, sideLabel=False, labelColor=None, yFormat=None, transparent=False, openFile=False, closeFig=True): #_logger.debug('plotStackedTimeSeries %s', sideLabel) setupPlot() df = dropExtraCols(df, inplace=False) grouped = df.groupby(index) df2 = grouped.aggregate(np.sum) df3 = df2.transpose() setupPalette(len(df3.columns), pal=palette) fig, ax = plt.subplots(1, 1, figsize=(8, 4)) df3.plot(kind='bar', stacked=True, ax=ax, grid=False, width=barWidth) # space out year labels to every 5 years locs, labels = plt.xticks() yearCols = filter(str.isdigit, df.columns) if int(yearCols[1]) - int(yearCols[0]) == 1 and yearStep > 1: plt.xticks(locs[::yearStep], yearCols[::yearStep]) if box == False: sns.despine(left=True) if yticks: plt.tick_params(axis='y', direction='out', length=5, width=.75, colors='k', left='on', right='off') lines = ax.get_lines() if lines: lines[0].set_visible(False) # get rid of ugly dashed line if zeroLine: ax.axhline(0, color='k', linewidth=0.75, linestyle='-') if ygrid: ax.yaxis.grid(color='lightgrey', linestyle='solid') if ymin is not None or ymax is not None: ax.set_autoscale_on(False) ax.set_ylim(ymin, ymax) plt.xlabel(xlabel) plt.ylabel(ylabel) legendY = -0.2 if legendY is None else legendY ax.legend(loc='upper center', bbox_to_anchor=(0.5, legendY), ncol=ncol) if title: ax.set_title(title, y=1.05) _finalizeFigure(fig, ax, outFile=outFile, sideLabel=sideLabel, labelColor=labelColor, yFormat=yFormat, transparent=transparent, openFile=openFile, closeFig=closeFig) return (fig, ax)
def plotTimeSeries(df, xlabel='', ylabel='', box=False, zeroLine=False, title="", ygrid=False, yticks=False, ymin=None, ymax=None, legend=False, legendY=None, yearStep=5, outFile=None, sideLabel=False, labelColor=None, yFormat=None, transparent=False, openFile=False, closeFig=True): setupPlot() fig, ax = plt.subplots(1, 1, figsize=(8, 4)) yearCols = filter(str.isdigit, df.columns) x = map(int, yearCols) y = list(df[yearCols].iloc[0]) plt.plot(x, y) # TBD: see if this is worth doing # space out year labels to every 5 years #locs, labels = plt.xticks() #plt.xticks(locs[::yearStep], yearCols[::yearStep]) if box == False: sns.despine(left=True) if yticks: plt.tick_params(axis='y', direction='out', length=5, width=.75, colors='k', left='on', right='off') if zeroLine: ax.axhline(0, color='k', linewidth=0.75, linestyle='-') if ygrid: ax.yaxis.grid(color='lightgrey', linestyle='solid') if ymin is not None or ymax is not None: ax.set_autoscale_on(False) ax.set_ylim(ymin, ymax) plt.xlabel(xlabel) plt.ylabel(ylabel) if legend: legendY = -0.2 if legendY is None else legendY ax.legend(loc='upper center', bbox_to_anchor=(0.5, legendY)) else: ax.legend([], frameon=False) if title: ax.set_title(title, y=1.05) _finalizeFigure(fig, ax, outFile=outFile, sideLabel=sideLabel, labelColor=labelColor, yFormat=yFormat, transparent=transparent, openFile=openFile, closeFig=closeFig) return (fig, ax)
def gRNA_maplot(s1, s2, prefix="", text=False, n_labels=30): # Rank of gRNA change fig, axis = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 8)) axis = axis.flatten() for i, screen in enumerate(s2.columns[::-1]): x = s1.join(s2) # .fillna(0) x = x.iloc[np.random.permutation(len(x))] x = x.ix[x.index[~x.index.str.contains("Wnt")]] if prefix.startswith("mid_screen-"): b = x["gDNA_Jurkat"] else: b = x["plasmid_pool_TCR"] x = x.fillna(0) b = b.fillna(0) M = np.log2(x[screen] * b) / 2. M = M.replace({-np.inf: 0, np.inf: 9}) fc = np.log2(1 + x[screen]) - np.log2(1 + b) fc.name = screen if i == 0: xx = pd.DataFrame(fc) else: xx = xx.join(fc, how="outer") colors = pd.DataFrame() colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt") colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL") colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr") colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess") colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist() axis[i].scatter(M, fc, color=colors, alpha=0.5) if text: for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index: axis[i].text( M.ix[j], fc.ix[j], j) axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5) axis[i].set_title(screen) for i in range(0, len(axis), 2): axis[i].set_ylabel("M") for ax in axis[-2:]: ax.set_xlabel("A") sns.despine(fig) fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.maplot.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight") fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.maplot.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight")
def gRNA_rank(s1, s2, prefix="", text=False, n_labels=30): # Rank of gRNA change fig, axis = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(8, 8)) axis = axis.flatten() for i, screen in enumerate(s2.columns[::-1]): x = s1.join(s2) # .fillna(0) x = x.iloc[np.random.permutation(len(x))] x = x.ix[x.index[~x.index.str.contains("Wnt")]] if prefix.startswith("mid_screen-"): b = x["gDNA_Jurkat"] else: b = x["plasmid_pool_TCR"] x = x.fillna(0) b = b.fillna(0) fc = np.log2(1 + x[screen]) - np.log2(1 + b) fc.name = screen if i == 0: xx = pd.DataFrame(fc) else: xx = xx.join(fc, how="outer") colors = pd.DataFrame() colors[sns.color_palette("colorblind")[0]] = x.index.str.contains("Wnt") colors[sns.color_palette("colorblind")[1]] = x.index.str.contains("CTRL") colors[sns.color_palette("colorblind")[2]] = x.index.str.contains("Tcr") colors[sns.color_palette("colorblind")[3]] = x.index.str.contains("Ess") colors = colors.apply(lambda x: x[x].index.tolist()[0], axis=1).tolist() axis[i].scatter(fc.rank(ascending=False, method="first"), fc, color=colors, alpha=0.5) if text: for j in x[x.index.str.contains("ETS1|GATA3|RUNX1")].index: axis[i].text( fc.rank(ascending=False, method="first").ix[j], fc.ix[j], j) axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5) axis[i].set_title(screen) for i in range(0, len(axis), 2): axis[i].set_ylabel("gRNA fold-change") for ax in axis[-2:]: ax.set_xlabel("gRNA rank") sns.despine(fig) fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.{}svg".format(prefix, "text." if text else "")), bbox_inches="tight") fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.{}pdf".format(prefix, "text." if text else "")), bbox_inches="tight") # Save ranked list xx.to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.csv".format(prefix)), index=True) # Save ranked list of gene-level measurements, reduced by mean and min m = pd.merge(xx.reset_index(), guide_annotation[["oligo_name", "gene"]], left_on="gRNA_name", right_on="oligo_name").drop("oligo_name", axis=1).set_index(["gene", "gRNA_name"]) m.groupby(level=[0]).mean().to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.gene_mean.rank.csv".format(prefix)), index=True) m.groupby(level=[0]).min().to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.gene_min.rank.csv".format(prefix)), index=True)
def gRNA_rank_stimulus(xx, s2, prefix=""): # Difference between unstimulated/stimulated fig, axis = plt.subplots(1, 3, sharex=False, sharey=True, figsize=(12, 3)) axis = axis.flatten() for i, screen in enumerate(s2.columns[::-1]): x = s1.join(s2) # .fillna(0) x = x.iloc[np.random.permutation(len(x))] if ("TCR" in screen) or ("Jurkat" in screen): x = x.ix[x.index[~x.index.str.contains("Wnt")]] if prefix.startswith("mid_screen-"): b = x["gDNA_Jurkat"] else: b = x["plasmid_pool_TCR"] elif ("WNT" in screen) or ("HEK" in screen): x = x.ix[x.index[~x.index.str.contains("Tcr")]] if prefix.startswith("mid_screen-"): if "_4_" in prefix: b = x["gDNA_HEKclone4"] else: b = x["gDNA_HEKclone6"] else: b = x["plasmid_pool_WNT"] fc = np.log2(1 + x[screen]) - np.log2(1 + b) fc.name = screen if i == 0: xx = pd.DataFrame(fc) else: xx = xx.join(fc, how="outer") screens = s2.columns[::-1] for i in range(0, len(s2.columns), 2): fc = (xx[screens[i + 1]] - xx[screens[i]]).dropna() fc.name = screens[i + 1] if i == 0: axis[i].set_ylabel("gRNA fold-change (stimulated / unstimulated)") xxx = pd.DataFrame(fc) else: xxx = xxx.join(fc, how="outer") colors = pd.DataFrame() colors[sns.color_palette("colorblind")[0]] = fc.index.str.contains("Wnt") colors[sns.color_palette("colorblind")[1]] = fc.index.str.contains("CTRL") colors[sns.color_palette("colorblind")[2]] = fc.index.str.contains("Tcr") colors[sns.color_palette("colorblind")[3]] = fc.index.str.contains("Ess") colors = colors.apply(lambda j: j[j].index.tolist()[0], axis=1).tolist() axis[i].scatter(fc.rank(ascending=False, method="first"), fc, color=colors, alpha=0.5) axis[i].axhline(y=0, color='black', linestyle='--', lw=0.5) axis[i].set_title(re.sub("_stimulated", "", screens[i + 1])) axis[i].set_xlabel("gRNA rank (stimulated / unstimulated)") sns.despine(fig) fig.savefig(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.diff_condition.svg".format(prefix)), bbox_inches="tight") xxx.columns = xxx.columns.str.extract("(.*)_stimulated") xxx.to_csv(os.path.join(results_dir, "gRNA_counts.norm.{}.rank.diff_condition.csv".format(prefix)), index=True)