我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用seaborn.distplot()。
def traffic_districution(self): data_dir = g_singletonDataFilePath.getTrainDir() df = self.load_trafficdf(data_dir) print df['traffic'].describe() # sns.distplot(self.gapdf['gap'],kde=False, bins=100); df['traffic'].plot(kind='hist', bins=100) plt.xlabel('Traffic') plt.title('Histogram of Traffic') return # def disp_gap_bydistrict(self, disp_ids = np.arange(34,67,1), cls1 = 'start_district_id', cls2 = 'time_id'): # # disp_ids = np.arange(1,34,1) # plt.figure() # by_district = self.gapdf.groupby(cls1) # size = len(disp_ids) # # size = len(by_district) # col_len = row_len = math.ceil(math.sqrt(size)) # count = 1 # for name, group in by_district: # if not name in disp_ids: # continue # plt.subplot(row_len, col_len, count) # group.groupby(cls2)['gap'].mean().plot() # count += 1 # return
def disp_gap_bydate(self): gaps_mean = self.gapdf.groupby('time_date')['gap'].mean() gaps_mean.plot(kind='bar') plt.ylabel('Mean of gap') plt.title('Date/Gap Correlation') # for i in gaps_mean.index: # plt.plot([i,i], [0, gaps_mean[i]], 'k-') plt.show() return # def drawGapDistribution(self): # self.gapdf[self.gapdf['gapdf'] < 10]['gapdf'].hist(bins=50) # # sns.distplot(self.gapdf['gapdf']); # # sns.distplot(self.gapdf['gapdf'], hist=True, kde=False, rug=False) # # plt.hist(self.gapdf['gapdf']) # plt.show() # return # def drawGapCorrelation(self): # _, (ax1, ax2) = plt.subplots(nrows=2, ncols=1) # res = self.gapdf.groupby('start_district_id')['gapdf'].sum() # ax1.bar(res.index, res.values) # res = self.gapdf.groupby('time_slotid')['gapdf'].sum() # ax2.bar(res.index.map(lambda x: x[11:]), res.values) # plt.show() # return
def __init__(self, path, games, logger, suffix): super(QuestionVsDialogue, self).__init__(path, self.__class__.__name__, suffix) q_by_d = [] for game in games: q_by_d.append(len(game.questions)) sns.set_style("whitegrid", {"axes.grid": False}) #ratio question/dialogues f = sns.distplot(q_by_d, norm_hist =True, kde=False, bins=np.arange(0.5, 25.5, 1)) f.set_xlim(0.5,25.5) f.set_ylim(bottom=0) f.set_xlabel("Number of questions", {'size':'14'}) f.set_ylabel("Ratio of dialogues", {'size':'14'})
def __init__(self, path, games, logger, suffix): super(WordVsQuestion, self).__init__(path, self.__class__.__name__, suffix) w_by_q = [] for game in games: for q in game.questions: q = re.sub('[?]', '', q) words = re.findall(r'\w+', q) w_by_q.append(len(words)) sns.set_style("whitegrid", {"axes.grid": False}) # ratio question/words f = sns.distplot(w_by_q, norm_hist=True, kde=False, bins=np.arange(2.5, 15.5, 1), color="g") f.set_xlabel("Number of words", {'size': '14'}) f.set_ylabel("Ratio of questions", {'size': '14'}) f.set_xlim(2.5, 14.5) f.set_ylim(bottom=0)
def overlap_visualize(): train,test,dev = load("nlpcc",filter=True) test = test.reindex(np.random.permutation(test.index)) df = test df['qlen'] = df['question'].str.len() df['alen'] = df['answer'].str.len() df['q_n_words'] = df['question'].apply(lambda row:len(row.split(' '))) df['a_n_words'] = df['answer'].apply(lambda row:len(row.split(' '))) def normalized_word_share(row): w1 = set(map(lambda word: word.lower().strip(), row['question'].split(" "))) w2 = set(map(lambda word: word.lower().strip(), row['answer'].split(" "))) return 1.0 * len(w1 & w2)/(len(w1) + len(w2)) df['word_share'] = df.apply(normalized_word_share, axis=1) plt.figure(figsize=(12, 8)) plt.subplot(1,2,1) sns.violinplot(x = 'flag', y = 'word_share', data = df[0:50000]) plt.subplot(1,2,2) sns.distplot(df[df['flag'] == 1.0]['word_share'][0:10000], color = 'green') sns.distplot(df[df['flag'] == 0.0]['word_share'][0:10000], color = 'red') print evaluation.evaluationBypandas(test,df['word_share']) plt.show('hold')
def follower_botness(username): #given a username, it creates the histogram of the botness of the followers #and saves it in plots (for now) it also returns the probable percentage of follower bots #(cutoff needs to be defined, for now it is 0.7)""" cutoff = 0.7 scorelist = [] followers = db.getFollowers(toName=username) for f in followers: follower = f['_from'].split('/')[1] score = db.getUser(follower)['botness']['score'] scorelist.append(score) if scorelist: scores = pd.Series(scorelist, name='probability of follower bot') ax = sns.distplot(scores) fig = ax.get_figure() fig.savefig('testfig.png') botpercent = sum(np.array(scorelist)>cutoff) / len(scorelist) return botpercent else: return None
def plot_word_frequency(df, words, category): plt.figure() allwords = df['lines'].str.cat(sep=' ') allwords = allwords.split() allwords = [word.lower() for word in allwords] allwords = np.array(allwords) indices = [] for word in words: new_indices = np.where(allwords == word) for index in new_indices[0]: indices.append(index) sns.distplot(indices, rug=True, hist=False) plt.xlim(0, len(allwords)) plt.ylabel("{category} word frequency".format(category=category)) plt.xlabel("Time (words)") plt.title("{category} words over time".format(category=category), loc='left') plt.savefig("../plots/{category}_frequency".format(category=category))
def _barplot(self, first: RunData, second: RunData, property: str, size: int, filename: str = None, show_ticks: bool = True) -> str: import matplotlib.pyplot as plt import seaborn as sns filename = filename or self._get_new_figure_filename() self._set_fig_size(size) length = min(len(first[property]), len(second[property])) first_prop = first[property][0:length] second_prop = second[property][0:length] min_xval = min(first_prop + second_prop) max_xval = max(first_prop + second_prop) bins = np.linspace(min_xval, max_xval, math.floor(math.sqrt(length) * size)) sns.distplot(first_prop, bins=bins,label=first.description(), kde=False) sns.distplot(second_prop, bins=bins,label=second.description(), kde=False) if not show_ticks: plt.xticks([]) plt.yticks([]) plt.xlim(min_xval, max_xval) plt.legend() plt.savefig(filename) plt.close() return filename
def plotHistogram(series, x_label, scale = "linear", normed=False, name = None): figure_name = "histogram" if name: figure_name = name + "_" + figure_name figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.distplot(series, kde = False, norm_hist=normed,ax = axis) axis.set_yscale(scale) axis.set_xlabel(x_label) # axis.set_ylabel(y_label) data.saveFigure(figure, figure_name)
def plot_dist( main_file, mask_file, xlabel, distribution=None, xlabel2=None, figsize=DINA4_LANDSCAPE): data = _get_values_inside_a_mask(main_file, mask_file) fig = plt.Figure(figsize=figsize) FigureCanvas(fig) gsp = GridSpec(2, 1) ax = fig.add_subplot(gsp[0, 0]) sns.distplot(data.astype(np.double), kde=False, bins=100, ax=ax) ax.set_xlabel(xlabel) ax = fig.add_subplot(gsp[1, 0]) sns.distplot(np.array(distribution).astype(np.double), ax=ax) cur_val = np.median(data) label = "{0!g}".format(cur_val) plot_vline(cur_val, label, ax=ax) ax.set_xlabel(xlabel2) return fig
def cross_section_hist(data, factor_name, date): '''??????????????????????? ?? -------------------------------- data:DataFrame(index:[Date,IDs],factor1,factor2,...) factor_name:str date?str ''' plot_data = data.ix[(date,), factor_name].reset_index(drop=True) ax = sns.distplot(plot_data) return ax # ??3 # Quantile-Quantile?????????????
def plot_predict_proba(y_pred_probs, clf, pdf=None): """Plots the predict proba distribution""" fig, ax = plt.subplots(1, figsize=(18, 8)) sns.set_style("white") sns.set_context("poster", font_scale=2.25, rc={"lines.linewidth": 1.25, "lines.markersize": 8}) sns.distplot(y_pred_probs) plt.xlabel('predict_proba') plt.ylabel('frequency') plt.title(clf + ' proba') if pdf: pdf.savefig() plt.close() else: plt.show()
def plot_mean_bootstrap_exponential_readme(): X = np.random.exponential(7, 4) classical_samples = [np.mean(resample(X)) for _ in range(10000)] posterior_samples = mean(X, 10000) l, r = highest_density_interval(posterior_samples) classical_l, classical_r = highest_density_interval(classical_samples) plt.subplot(2, 1, 1) plt.title('Bayesian Bootstrap of mean') sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples') plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI') plt.xlim(-1, 18) plt.legend() plt.subplot(2, 1, 2) plt.title('Classical Bootstrap of mean') sns.distplot(classical_samples, label='Classical Bootstrap Samples') plt.plot([classical_l, classical_r], [0, 0], linewidth=5.0, marker='o', label='95% HDI') plt.xlim(-1, 18) plt.legend() plt.savefig('readme_exponential.png', bbox_inches='tight')
def joint_plot(x, y, xlabel=None, ylabel=None, xlim=None, ylim=None, loc="best", color='#0485d1', size=8, markersize=50, kind="kde", scatter_color="r"): with sns.axes_style("darkgrid"): if xlabel and ylabel: g = SubsampleJointGrid(xlabel, ylabel, data=DataFrame(data={xlabel: x, ylabel: y}), space=0.1, ratio=2, size=size, xlim=xlim, ylim=ylim) else: g = SubsampleJointGrid(x, y, size=size, space=0.1, ratio=2, xlim=xlim, ylim=ylim) g.plot_joint(sns.kdeplot, shade=True, cmap="Blues") g.plot_sub_joint(plt.scatter, 1000, s=20, c=scatter_color, alpha=0.3) g.plot_marginals(sns.distplot, kde=False, rug=False) g.annotate(ss.pearsonr, fontsize=25, template="{stat} = {val:.2g}\np = {p:.2g}") g.ax_joint.set_yticklabels(g.ax_joint.get_yticks()) g.ax_joint.set_xticklabels(g.ax_joint.get_xticks()) return g
def joint_overplot(x, y, df, fig, color='r', marg_kws=None): """Overplot additional data on existing JointGrid instance. Args: x (str): y (str): df (DataFrame): fig: seaborn JointGrid instance. color (str): Color. marg_kws (dict): Keyword arguments to pass to plot_marginals(). Returns: fig: seaborn JointGrid instance. """ if marg_kws is None: marg_kws = dict(norm_hist=True, hist_kws=dict(weights=df.Survivors.values)) fig.x = df[x] fig.y = df[y] fig.plot_joint(plt.scatter, c=color) fig.plot_marginals(sns.distplot, color=color, kde=False, axlabel=False, **marg_kws) return fig
def display_covariate_dist(self, covariate_list, save_file=None): ''' ''' n_covars = len(covariate_list) for covariate in covariate_list: g = sns.FacetGrid(self.data, col="arm_assignment") if len(self.data[covariate].unique())>2: g.map(sns.distplot, covariate, kde=False) else: g.map(sns.distplot, covariate, kde=False) if save_file: g.savefig(save_file, dpi=450) if save_file is None: sns.plt.show()
def weight_norm_histogram(rbm, show_plot=False, filename=None): import matplotlib.pyplot as plt import seaborn as sns fig, ax = plt.subplots() for l in range(rbm.num_weights): num_inputs = rbm.weights[l].shape[0] norm = be.to_numpy_array(be.norm(rbm.weights[l].W(), axis=0) / sqrt(num_inputs)) sns.distplot(norm, ax=ax, label=str(l)) ax.legend() if show_plot: plt.show(fig) if filename is not None: fig.savefig(filename) plt.close(fig)
def screen_zscore(series, axis=None, z_score=False, plot=True): """ Calculate screen z score (difference between positive and negative controls). """ Z = lambda pos, neg: 1 - (3 * (np.std(pos) + np.std(neg)) / (abs(np.mean(pos) - np.mean(neg)))) if z_score: series = (series - series.mean()) / series.std() pos = series.ix[series.index[series.index.str.contains("Essential")]] neg = series.ix[series.index[series.index.str.contains("CTRL")]] z = Z(pos, neg) # Plot if plot: pos.name = None neg.name = None if axis is None: fig, axis = plt.subplots(1) sns.distplot(pos, ax=axis, label="positive controls") sns.distplot(neg, ax=axis, label="negative controls; screen Z-score = {}".format(z)) return z
def target_plot(self): target_type = self.input_data.metadata.loc[self.target].type target_data = self.input_data.df[self.target] sns.set(style="white", color_codes=True) if not self.run_time_config['is_time_series']: if target_type == ColType.BINARY: plt.figure(figsize=(6, 1)) sns.barplot(target_data.sum() / target_data.shape[0]) plt.xlim([0, 1]) plt.title(target_data.name + ' rate') elif target_type == ColType.NUMERIC or target_type == ColType.ORDINAL: plt.figure(figsize=(6, 2)) ax = sns.distplot(target_data, hist_kws=dict(edgecolor='black')) ax.set_xlim(target_data.min(), target_data.max()) plt.title(target_data.name + ' histogram') else: self.time_series_target_plot()
def distribution(data,xlabel="data",ylabel="percentage",name=None): ax = plt.axes() ax.set(xlabel=xlabel,ylabel=ylabel) ds = sns.distplot(data,ax=ax) plt.show() if name is not None: ds.get_figure().savefig(name)
def weather_distribution(self): data_dir = g_singletonDataFilePath.getTrainDir() self.gapdf = self.load_weatherdf(data_dir) print self.gapdf['weather'].describe() # sns.distplot(self.gapdf['gap'],kde=False, bins=100); sns.countplot(x="weather", data=self.gapdf, palette="Greens_d"); plt.title('Countplot of Weather') # self.gapdf['weather'].plot(kind='bar') # plt.xlabel('Weather') # plt.title('Histogram of Weather') return
def gapdistricution(self): data_dir = g_singletonDataFilePath.getTrainDir() self.gapdf = self.load_gapdf(data_dir) print self.gapdf['gap'].describe() # sns.distplot(self.gapdf['gap'],kde=False, bins=100); self.gapdf['gap'].plot(kind='hist', bins=200) plt.xlabel('Gaps') plt.title('Histogram of Gaps') return
def generateDistributionPlot(test): sns.set(color_codes=True) for row in test: label = row.pop(0) d = [float(i) for i in row] # Plot a filled kernel density estimate sns.distplot(d, hist=False, kde_kws={"shade": True}, label=label) plt.xlim([-0.01, 0.1]) plt.xlabel("time (s)") plt.ylabel("operations")
def plot_dist(train_y,dev_y,test_y): import seaborn as sns import matplotlib.pyplot as plt plt.rc('text', usetex=True) plt.rc('font', family='Times-Roman') sns.set_style(style='white') color = sns.color_palette("Set2", 10) fig = plt.figure(figsize=(8,12)) ax1 = fig.add_subplot(3, 1, 1) # plt.title("Label distribution",fontsize=20) sns.distplot(train_y,kde=False,label='Training', hist=True, norm_hist=True,color="blue") ax1.set_xlabel("Answer") ax1.set_ylabel("Frequency") ax1.set_xlim([0,500]) plt.legend(loc='best') ax2 = fig.add_subplot(3, 1, 2) sns.distplot(dev_y,kde=False,label='Validation', hist=True, norm_hist=True,color="green") ax2.set_xlabel("Answer") ax2.set_ylabel("Frequency") ax2.set_xlim([0,500]) plt.legend(loc='best') ax3 = fig.add_subplot(3, 1, 3) sns.distplot(test_y,kde=False,label='Test', hist=True, norm_hist=True,color="red") ax3.set_xlabel("Answer") ax3.set_ylabel("Frequency") ax3.set_xlim([0,500]) plt.legend(loc='best') plt.savefig('checkpoints/label_dist.pdf', format='pdf', dpi=300) plt.show()
def getPlot(self, params): n = int(params['bins']) fig = sns.distplot(self.likes, bins=n, rug = False, kde = False, hist_kws=dict(alpha=0.75, edgecolor="k", linewidth=1)) fig.set_xlabel('Number of likes') fig.set_ylabel('Count') return fig
def plot_z(self,indices=None,figsize=(15,5),loc=1): import matplotlib.pyplot as plt import matplotlib.mlab as mlab import seaborn as sns plt.figure(figsize=figsize) for z in range(1,len(self.z_list)+1): if indices is not None and z-1 not in indices: continue else: if hasattr(self.z_list[z-1], 'sample'): sns.distplot(self.z_list[z-1].prior.transform(self.z_list[z-1].sample), rug=False, hist=False,label=self.z_list[z-1].method + ' estimate of ' + self.z_list[z-1].name) elif hasattr(self.z_list[z-1], 'value') and hasattr(self.z_list[z-1], 'std'): if self.z_list[z-1].prior.transform_name is None: x = np.linspace(self.z_list[z-1].value-self.z_list[z-1].std*3.5,self.z_list[z-1].value+self.z_list[z-1].std*3.5,100) plt.plot(x,mlab.normpdf(x,self.z_list[z-1].value,self.z_list[z-1].std),label=self.z_list[z-1].method + ' estimate of ' + self.z_list[z-1].name) else: sims = self.z_list[z-1].prior.transform(np.random.normal(self.z_list[z-1].value,self.z_list[z-1].std,100000)) sns.distplot(sims, rug=False, hist=False,label=self.z_list[z-1].method + ' estimate of ' + self.z_list[z-1].name) else: raise ValueError("No information on latent variable to plot!") plt.xlabel('Value') plt.ylabel('Frequency') plt.title('Latent Variable Plot') plt.legend(loc=1) plt.show()
def trace_plot(self,figsize=(15,15)): import matplotlib.pyplot as plt import matplotlib.mlab as mlab import seaborn as sns if hasattr(self.z_list[0], 'sample'): fig = plt.figure(figsize=figsize) palette = [(0.2980392156862745, 0.4470588235294118, 0.6901960784313725), (0.3333333333333333, 0.6588235294117647, 0.40784313725490196), (0.7686274509803922, 0.3058823529411765, 0.3215686274509804), (0.5058823529411764, 0.4470588235294118, 0.6980392156862745), (0.8, 0.7254901960784313, 0.4549019607843137), (0.39215686274509803, 0.7098039215686275, 0.803921568627451)] * len(self.z_list) for j in range(len(self.z_list)): chain = self.z_list[j].sample for k in range(4): iteration = j*4 + k + 1 ax = fig.add_subplot(len(self.z_list),4,iteration) if iteration in range(1,len(self.z_list)*4 + 1,4): a = sns.distplot(self.z_list[j].prior.transform(chain), rug=False, hist=False,color=palette[j]) a.set_ylabel(self.z_list[j].name) if iteration == 1: a.set_title('Density Estimate') elif iteration in range(2,len(self.z_list)*4 + 1,4): a = plt.plot(self.z_list[j].prior.transform(chain),color=palette[j]) if iteration == 2: plt.title('Trace Plot') elif iteration in range(3,len(self.z_list)*4 + 1,4): plt.plot(np.cumsum(self.z_list[j].prior.transform(chain))/np.array(range(1,len(chain)+1)),color=palette[j]) if iteration == 3: plt.title('Cumulative Average') elif iteration in range(4,len(self.z_list)*4 + 1,4): plt.bar(range(1,10),[acf(chain,lag) for lag in range(1,10)],color=palette[j]) if iteration == 4: plt.title('ACF Plot') sns.plt.show() else: raise ValueError("No samples to plot!")
def plot_distribution(df, target, tag='eda', directory=None): r"""Display a Distribution Plot. Parameters ---------- df : pandas.DataFrame The dataframe containing the ``target`` feature. target : str The target variable for the distribution plot. tag : str Unique identifier for the plot. directory : str, optional The full specification of the plot location. Returns ------- None : None. References ---------- http://seaborn.pydata.org/generated/seaborn.distplot.html """ logger.info("Generating Distribution Plot") # Generate the distribution plot dist_plot = sns.distplot(df[target]) dist_fig = dist_plot.get_figure() # Save the plot write_plot('seaborn', dist_fig, 'distribution_plot', tag, directory) # # Function plot_box #
def plot_action_distribution(actions, file="action_ditribution.png"): plt.figure(figsize=(10, 10)) sb.distplot(actions, kde=False) plt.ylabel("probability") plt.xlabel("action") plt.title("Action distribution") plt.savefig(file) plt.close()
def plot_value_distribution(values, file="value_distribution.png"): plt.figure(figsize=(10, 10)) sb.distplot(values) plt.xlabel("critic value") plt.title("Value distribution")
def action_distribution(actions, ax=None, file="action_ditribution.png"): plt.figure(figsize=(10, 10)) sb.distplot(actions, kde=False, ax=ax) plt.ylabel("probability") plt.xlabel("action") plt.title("Action distribution") plt.savefig(file) plt.close()
def update(data, ax, xlim, ylim, vl): ax.clear() sns.distplot(data, ax=ax) if xlim: ax.set_xlim(xlim) if ylim: ax.set_ylim(ylim) if vl is not None: ax.plot([vl, vl], ax.get_ylim(), "k--") return ax
def generate_test_figures_2d_histogram(cls): """generate_test_figures_2d_histogram class method. Generate a tuple of 2d histogram figures. """ # Create series. Will be divided by more than //2 when all plots are # ready. def dist_function01(): return np.random.normal( size=cls.n_lines_test_pandas) def dist_function02(): return np.random.randint( 0, 99999) * np.arange(cls.n_lines_test_pandas) def dist_function03(): return np.random.randint( 0, 99999) * np.ones(cls.n_lines_test_pandas) dist_functions = (dist_function01, dist_function02, dist_function03) iterable_of_series = (pd.Series(np.random.choice(dist_functions)()) for _ in range(cls.n_graphical_tests//2)) # Create figures from series. figures = tuple(map( cls.figure_from_plot_function, itertools.repeat(lambda x: sns.distplot(x, kde=False)), iterable_of_series)) return figures
def histogram_of_floats(a, *args, **sns_distplot_kwargs): """Plot a histogram of floats with sane defauts. Arguments: a (pd.Series): Float series to create a histogram plot. Returns: matplotlib.axes.Axes: the plotted axes. Examples: >>> import pandas_utilities as pu >>> float_serie = pu.dummy_dataframe().float_0 >>> fig = plt.figure() >>> axes = histogram_of_floats(float_serie, kde=False) >>> isinstance(axes, matplotlib.axes.Axes) True >>> fig.savefig('/tmp/doctest_{0}.png'.format( \ 'histogram_of_floats'), dpi=500) """ axes = sns.distplot( a, *args, **sns_distplot_kwargs) return axes
def display_distrib(pd, feature): plt.figure() sns.distplot(pd[feature].dropna() , fit=norm); (mu, sigma) = norm.fit(pd[feature].dropna()) plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc='best') plt.ylabel('Frequency') plt.title('SalePrice distribution') plt.show()
def plot_fd(fd_file, fd_radius, mean_fd_dist=None, figsize=DINA4_LANDSCAPE): fd_power = _calc_fd(fd_file, fd_radius) fig = plt.Figure(figsize=figsize) FigureCanvas(fig) if mean_fd_dist: grid = GridSpec(2, 4) else: grid = GridSpec(1, 2, width_ratios=[3, 1]) grid.update(hspace=1.0, right=0.95, left=0.1, bottom=0.2) ax = fig.add_subplot(grid[0, :-1]) ax.plot(fd_power) ax.set_xlim((0, len(fd_power))) ax.set_ylabel("Frame Displacement [mm]") ax.set_xlabel("Frame number") ylim = ax.get_ylim() ax = fig.add_subplot(grid[0, -1]) sns.distplot(fd_power, vertical=True, ax=ax) ax.set_ylim(ylim) if mean_fd_dist: ax = fig.add_subplot(grid[1, :]) sns.distplot(mean_fd_dist, ax=ax) ax.set_xlabel("Mean Frame Displacement (over all subjects) [mm]") mean_fd = fd_power.mean() label = r'$\overline{{\text{{FD}}}}$ = {0:g}'.format(mean_fd) plot_vline(mean_fd, label, ax=ax) return fig
def plot_distribution(data, title): data = np.array([d for d in data]) sns.distplot(data, rug=True) plt.title(title) plt.show()
def plot_filter_densities(densities, filename=None): sns.set(font_scale=1.3) fig, ax = plt.subplots() sns.distplot(densities, kde=False, ax=ax) ax.set_xlabel('Activation') if filename: fig.savefig(filename) plt.close()
def explore_feature_variation(self, col=None, use_target=False, **kwargs): ''' Produces univariate plots of a given set of columns. Barplots are used for categorical columns while histograms (with fitted density functinos) are used for numerical columns. If use_target is true, then the variation of the given set of columns with respect to the response variable are used (e.g., 2d scatter plots, boxplots, etc). Parameters ---------- col : a string of a column name, or a list of many columns names or None (default). If col is None, all columns will be used. use_target : bool, default False Whether to use the target column in the plots. **kwargs: additional arguments to be passed to seaborn's distplot or to pandas's plotting utilities.. ''' self._validate_params(params_list = {'col':col}, expected_types= {'col':[str,list,type(None)]}) if type(col) is str: col = [col] if col is None: col = self._get_all_features() if use_target == False: for column in col: if self.is_numeric(self.df[column]) == True: plt.figure(column) #sns.despine(left=True) sns.distplot(self.df[column], color="m", **kwargs) plt.title(column) plt.tight_layout() #plt.figure('boxplot') #sns.boxplot(x=self.df[col], palette="PRGn") #sns.despine(offset=10, trim=True) elif self.is_categorical(self.df[column]) == True: #print self.df[column].describe() plt.figure(column) #sns.despine(left=True) if len(self.df[column].unique()) > 30: self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs) #top = pd.DataFrame(data=top) #sns.barplot(y=top.index, x=top) else: self.df[column].value_counts()[::-1].plot.barh(**kwargs) #sns.countplot(y=self.df[column]) plt.title(column) plt.tight_layout() else: raise TypeError('TYPE IS NOT SUPPORTED') else: # use target variable for column in col: self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
def plot_score_distribution(y_pred, so): """ Plots scores of predicted values """ min_x = min(min(y_pred), 0) max_x = max(max(y_pred), 1) sns.distplot(y_pred, kde=False) plt.title("distribution of scores for {} model".format(so['model_name'])) plt.xlabel("raw prediction score") plt.xlim([min_x, max_x]) plt.ylabel("number of street segments") base = so['results_dir'] + so['model_name'] + "_" + \ str(so['timestamp']) + "_" + so['break_window'] plt.savefig(base + '_score_distribution.png', bbox_inches='tight') plt.close()
def plot_pdf(df): df_num = df.select_dtypes(include=[np.float, np.int]) # rows = df_num / 3 # f, axes = plt.subplots(3, rows + 1) # print axes for index in df_num.columns: try: sns.distplot(df_num[index], color="m") except: print index, "error (probably Nan)"
def continuous_plots(dataFrame, continuous_factors): plots = plt.subplots(len(continuous_factors), 2, figsize=(8,12)) column = 0 for factor in continuous_factors: sns.distplot(dataFrame[factor],ax=plots[1][0][column], label=factor) plots[1][0][column].legend() column += 1 plt.tight_layout()
def plot_dist(*args): import seaborn as sns for x in args: plt.figure() sns.distplot(x) plt.show()
def plot_mean_bootstrap(): X = [-1, 0, 1] posterior_samples = mean(X, 10000) sns.distplot(posterior_samples) classical_samples = [np.mean(resample(X)) for _ in range(10000)] sns.distplot(classical_samples) plt.show()
def plot_mean_resample_bootstrap(): X = [-1, 0, 1] posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100) sns.distplot(posterior_samples) classical_samples = [np.mean(resample(X)) for _ in range(10000)] sns.distplot(classical_samples) plt.show()
def plot_median(): X = np.random.uniform(-1, 1, 10) posterior_samples = bayesian_bootstrap(X, np.median, 10000, 100) sns.distplot(posterior_samples) classical_samples = [np.median(resample(X)) for _ in range(10000)] sns.distplot(classical_samples) plt.show()
def plot_var_bootstrap(): X = np.random.uniform(-1, 1, 100) posterior_samples = var(X, 10000) sns.distplot(posterior_samples) classical_samples = [np.var(resample(X)) for _ in range(10000)] sns.distplot(classical_samples) plt.show()
def plot_self_covar_bootstrap(): X = np.random.uniform(-1, 1, 100) posterior_samples = covar(X, X, 10000) sns.distplot(posterior_samples) plt.show()
def plot_var_resample_bootstrap(): X = np.random.uniform(-1, 1, 100) posterior_samples = bayesian_bootstrap(X, np.var, 10000, 500) sns.distplot(posterior_samples) classical_samples = [np.var(resample(X)) for _ in range(10000)] sns.distplot(classical_samples) plt.show()