def plot_similardishes(idx,xlim): match = yum_ingr2.iloc[yum_cos[idx].argsort()[-21:-1]][::-1] newidx = match.index.get_values() match['cosine'] = yum_cos[idx][newidx] match['rank'] = range(1,1+len(newidx)) label1, label2 =[],[] for i in match.index: label1.append(match.ix[i,'cuisine']) label2.append(match.ix[i,'recipeName']) fig = plt.figure(figsize=(10,10)) ax = sns.stripplot(y='rank', x='cosine', data=match, jitter=0.05, hue='cuisine',size=15,orient="h") ax.set_title(yum_ingr2.ix[idx,'recipeName']+'('+yum_ingr2.ix[idx,'cuisine']+')',fontsize=18) ax.set_xlabel('Flavor cosine similarity',fontsize=18) ax.set_ylabel('Rank',fontsize=18) ax.yaxis.grid(color='white') ax.xaxis.grid(color='white') for label, y,x, in zip(label2, match['rank'],match['cosine']): ax.text(x+0.001,y-1,label, ha = 'left') ax.legend(loc = 'lower right',prop={'size':14}) ax.set_ylim([20,-1]) ax.set_xlim(xlim)
def plot_null(fnull, fcore, ax=None): """ Plot the null shared response overlayed with the actual number of shared responders. """ null = pd.read_csv(fnull, sep='\t') core = pd.read_csv(fcore, sep='\t', index_col=0) ncore = core.groupby('overall').size() for i in [-1, 0, 1]: if i not in ncore: ncore.loc[i] = 0 order = ['health', 'mixed', 'disease'] if ax is None: fig, ax = plt.subplots() sns.stripplot(data=null, x='type', y='n', order=order, jitter=True, ax=ax, alpha=0.2) ax.scatter([0, 1, 2], [ncore.loc[-1], ncore.loc[0], ncore.loc[1]], c='k', marker='D', s=25, zorder=10) return ax
def plot_commits_by_engineer(self): plt.figure(figsize=(16, 14)) sns.stripplot(x="CommitDate", y="Engineer", data=self.tickets, jitter=True)
def response_surface_analysis(df): """Perform response surface analysis on df.""" def tally_results(df): features = [f for f in list(df.columns.values) if f not in ['classifier', 'test_accuracy']] classifiers = ['Decision Tree', 'Linear SVC', 'Logistic Regression', 'Multinomial NB', 'Random Forest', 'Voting Classifier'] for classifier in classifiers: from_class = df['classifier'] == classifier class_mean = df[from_class]['test_accuracy'].mean() for feature in features: with_feature = df[(df[feature] == 1) & from_class] wo_feature = df[(df[feature] == 0) & from_class] acc_diff = (with_feature['test_accuracy'].sum() - wo_feature['test_accuracy'].sum()) mean_diff = acc_diff / 255 + class_mean yield classifier, feature, mean_diff results = pd.DataFrame([res for res in tally_results(df)], columns=['classifier', 'feature', 'effect']) class_order = list(df.groupby('classifier')['test_accuracy'] .mean() .sort_values(ascending=False) .index) results['classifier'] = pd.Categorical(results['classifier'], categories=class_order) feat_order = list(results.groupby('feature')['effect'] .mean() .sort_values(ascending=False) .index) results['feature'] = pd.Categorical(results['feature'], categories=feat_order) results.sort_values(['feature', 'classifier'], inplace=True) print(results) sns.stripplot('effect', 'feature', hue='classifier', data=results) sns.plt.legend() sns.plt.show()
def aga_attachedness( adata, attachedness_type='scaled', color_map=None, show=None, save=None): """Attachedness of aga groups. """ if attachedness_type == 'scaled': attachedness = adata.uns['aga_attachedness'] elif attachedness_type == 'distance': attachedness = adata.uns['aga_distances'] elif attachedness_type == 'absolute': attachedness = adata.uns['aga_attachedness_absolute'] else: raise ValueError('Unkown attachedness_type {}.'.format(attachedness_type)) adjacency = adata.uns['aga_adjacency'] matrix(attachedness, color_map=color_map, show=False) for i in range(adjacency.shape[0]): neighbors = adjacency[i].nonzero()[1] pl.scatter([i for j in neighbors], neighbors, color='green') utils.savefig_or_show('aga_attachedness', show=show, save=save) # as a stripplot if False: pl.figure() for i, ds in enumerate(attachedness): ds = np.log1p(ds) x = [i for j, d in enumerate(ds) if i != j] y = [d for j, d in enumerate(ds) if i != j] pl.scatter(x, y, color='gray') neighbors = adjacency[i] pl.scatter([i for j in neighbors], ds[neighbors], color='green') pl.show()
def stripboxplot(x, y, data, ax=None, significant=None, **kwargs): """ Overlay a stripplot on top of a boxplot. """ ax = sb.boxplot( x=x, y=y, data=data, ax=ax, fliersize=0, **kwargs ) plot = sb.stripplot( x=x, y=y, data=data, ax=ax, jitter=kwargs.pop("jitter", 0.05), color=kwargs.pop("color", "0.3"), **kwargs ) if data[y].min() >= 0: hide_negative_y_ticks(plot) if significant is not None: add_significance_indicator(plot=plot, significant=significant) return plot
def stemplot(x, y, data, order, ax, palette, marker='o', size=7): """ Wrapper to make one stemplot with colored dashed lines leading to colored marker. Parameters ---------- x, y : str used in call to sns.stripplot() with data data : pandas dataframe Should have the values that are given for 'order' in the index, or a column called 'label' with those values. order : list order of x values ax : Axis object axis handle to plot values on palette : dict {values in x-axis : color mapping value} marker : str marker value to pass to stripplot size : int size of marker Returns ------- ax """ if 'label' in data: data.index = data['label'] sns.stripplot(x=x, y=y, data=data, order=order, ax=ax, palette=palette, size=size, marker=marker) _, stemlines, baseline = ax.stem(data.loc[order, y], markerfmt=" ", linefmt=":") # Remove stemplot baseline plt.setp(baseline, visible=False) # Change stem colors colorslist = [palette[i] for i in order] _ = [plt.setp(stemlines[i], 'color', colorslist[i]) for i in range(len(colorslist))] _ = [i.set_alpha(0.75) for i in stemlines] return ax
def plot_alphadf(alphasdf, col_order, labeldict, metric='alpha'): """ Plot faceted alpha diversity. Parameters ---------- alphasdf : pandas DataFrame columns ['study', 'alpha', 'DiseaseState'] col_order : list dataset IDs in the order they should be plotted labeldict : dict dictionary with {dataset: label} mteric : str alpha diversity metric, to use in labeling y axis Returns ------- fig : Figure """ sns.set_style('white') g = sns.FacetGrid(alphasdf, col='study', col_wrap=6, col_order=col_order, sharex=False, sharey=False) g = g.map(sns.boxplot, "DiseaseState", "alpha") g = g.map(sns.stripplot, "DiseaseState", "alpha", split=True, jitter=True, size=5, linewidth=0.6) fig = plt.gcf() fig.set_size_inches(14.2, 9) # Fix y-axis gridlines axs = g.axes for i in range(len(axs)): ax = axs[i] yticks = ax.get_yticks() # If bottom limit is between 0 and 1 (i.e. not simpson) if not (yticks[0] < 1 and yticks[0] > 0): ax.set_ylim(floor(yticks[0]), floor(yticks[-1])) if yticks[0] < 0: ax.set_ylim(0, floor(yticks[-1])) yticks = ax.get_yticks() if (yticks[0] < 1 and yticks[0] > 0): ax.set_yticks(yticks[1::2]) else: ax.set_yticks(yticks[::2]) # Need some space on the y-axis for p-values ax.set_ylim(ax.get_ylim()[0], 1.2*ax.get_ylim()[1]) # Update title oldtitle = ax.get_title() newtitle = labeldict[oldtitle.split('=')[1].strip()] ax.set_title(newtitle) # Update y label if i % 6 == 0: ax.set_ylabel(metric) plt.tight_layout() return fig
def plot_ubiq_abun_boxplot(tidy, metric, calculation): """ Plot boxplot where x-axis is 'overall_significance' of genus, and values are either ubiquity or abundance in tidy (with the respective metric and calculation type) Parameters ---------- tidy : pandas dataframe has columns overall_significance, value, patient, metric, and calculation metric : str 'abundance' or 'ubiquity' calculation: str 'from_pooled_mean' or 'mean_of_datasets' Returns ------- ax : Axis object """ fig, ax = plt.subplots(figsize=(5.5,4)) tmp = tidy.query('metric == @metric')\ .query('calculation == @calculation')\ .query('patient == "total"') boxprops = {'edgecolor': 'k', 'facecolor': 'w'} lineprops = {'color': 'k'} # Plot log10(abundance) if metric == 'abundance': tmp.loc[tmp.index, 'value'] = tmp['value'].apply(np.log10) sns.boxplot(data=tmp, x='overall_significance', y='value', fliersize=0, ax=ax, color='w', order=['health', 'disease', 'mixed', 'not_sig'], **{'boxprops': boxprops, 'medianprops': lineprops, 'whiskerprops': lineprops, 'capprops': lineprops}) sns.stripplot(data=tmp, x='overall_significance', y='value', jitter=True, linewidth=0.6, split=True, ax=ax, order=['health', 'disease', 'mixed', 'not_sig'], color='w') return fig, ax