Python seaborn 模块,distplot() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用seaborn.distplot()

项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def traffic_districution(self):
        data_dir = g_singletonDataFilePath.getTrainDir()
        df = self.load_trafficdf(data_dir)
        print df['traffic'].describe()
#         sns.distplot(self.gapdf['gap'],kde=False, bins=100);
        df['traffic'].plot(kind='hist', bins=100)
        plt.xlabel('Traffic')
        plt.title('Histogram of Traffic')

        return
#     def disp_gap_bydistrict(self, disp_ids = np.arange(34,67,1), cls1 = 'start_district_id', cls2 = 'time_id'):
# #         disp_ids = np.arange(1,34,1)
#         plt.figure()
#         by_district = self.gapdf.groupby(cls1)
#         size = len(disp_ids)
# #         size = len(by_district)
#         col_len = row_len = math.ceil(math.sqrt(size))
#         count = 1
#         for name, group in by_district:
#             if not name in disp_ids:
#                 continue
#             plt.subplot(row_len, col_len, count)
#             group.groupby(cls2)['gap'].mean().plot()
#             count += 1   
#         return
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def disp_gap_bydate(self):
        gaps_mean = self.gapdf.groupby('time_date')['gap'].mean()
        gaps_mean.plot(kind='bar')
        plt.ylabel('Mean of gap')
        plt.title('Date/Gap Correlation')
#         for i in gaps_mean.index:
#             plt.plot([i,i], [0, gaps_mean[i]], 'k-')
        plt.show()
        return

#     def drawGapDistribution(self):
#         self.gapdf[self.gapdf['gapdf'] < 10]['gapdf'].hist(bins=50)
# #         sns.distplot(self.gapdf['gapdf']);
# #         sns.distplot(self.gapdf['gapdf'], hist=True, kde=False, rug=False)
# #         plt.hist(self.gapdf['gapdf'])
#         plt.show()
#         return
#     def drawGapCorrelation(self):
#         _, (ax1, ax2) = plt.subplots(nrows=2, ncols=1)
#         res = self.gapdf.groupby('start_district_id')['gapdf'].sum()
#         ax1.bar(res.index, res.values)
#         res = self.gapdf.groupby('time_slotid')['gapdf'].sum()
#         ax2.bar(res.index.map(lambda x: x[11:]), res.values)
#         plt.show()
#         return
项目:guesswhat    作者:GuessWhatGame    | 项目源码 | 文件源码
def __init__(self, path, games, logger, suffix):
        super(QuestionVsDialogue, self).__init__(path, self.__class__.__name__, suffix)

        q_by_d = []
        for game in games:
            q_by_d.append(len(game.questions))

        sns.set_style("whitegrid", {"axes.grid": False})


        #ratio question/dialogues
        f = sns.distplot(q_by_d, norm_hist =True, kde=False, bins=np.arange(0.5, 25.5, 1))
        f.set_xlim(0.5,25.5)
        f.set_ylim(bottom=0)

        f.set_xlabel("Number of questions", {'size':'14'})
        f.set_ylabel("Ratio of dialogues", {'size':'14'})
项目:guesswhat    作者:GuessWhatGame    | 项目源码 | 文件源码
def __init__(self, path, games, logger, suffix):
        super(WordVsQuestion, self).__init__(path, self.__class__.__name__, suffix)


        w_by_q = []
        for game in games:
            for q in game.questions:
                q = re.sub('[?]', '', q)
                words = re.findall(r'\w+', q)
                w_by_q.append(len(words))

        sns.set_style("whitegrid", {"axes.grid": False})

        # ratio question/words
        f = sns.distplot(w_by_q, norm_hist=True, kde=False, bins=np.arange(2.5, 15.5, 1), color="g")

        f.set_xlabel("Number of words", {'size': '14'})
        f.set_ylabel("Ratio of questions", {'size': '14'})
        f.set_xlim(2.5, 14.5)
        f.set_ylim(bottom=0)
项目:tensorflow-deep-qa    作者:shuishen112    | 项目源码 | 文件源码
def overlap_visualize():
    train,test,dev = load("nlpcc",filter=True)
    test = test.reindex(np.random.permutation(test.index))
    df = test
    df['qlen'] = df['question'].str.len()
    df['alen'] = df['answer'].str.len()

    df['q_n_words'] = df['question'].apply(lambda row:len(row.split(' ')))
    df['a_n_words'] = df['answer'].apply(lambda row:len(row.split(' ')))

    def normalized_word_share(row):
        w1 = set(map(lambda word: word.lower().strip(), row['question'].split(" ")))
        w2 = set(map(lambda word: word.lower().strip(), row['answer'].split(" ")))    
        return 1.0 * len(w1 & w2)/(len(w1) + len(w2))
    df['word_share'] = df.apply(normalized_word_share, axis=1)

    plt.figure(figsize=(12, 8))
    plt.subplot(1,2,1)
    sns.violinplot(x = 'flag', y = 'word_share', data = df[0:50000])
    plt.subplot(1,2,2)
    sns.distplot(df[df['flag'] == 1.0]['word_share'][0:10000], color = 'green')
    sns.distplot(df[df['flag'] == 0.0]['word_share'][0:10000], color = 'red')

    print evaluation.evaluationBypandas(test,df['word_share'])
    plt.show('hold')
项目:PolBotCheck    作者:codeforfrankfurt    | 项目源码 | 文件源码
def follower_botness(username):
#given a username, it creates the histogram of the botness of the followers 
#and saves it in plots (for now)  it also returns the probable percentage of follower bots
#(cutoff needs to be defined, for now it is 0.7)""" 
    cutoff = 0.7
    scorelist = []
    followers = db.getFollowers(toName=username)
    for f in followers:
        follower = f['_from'].split('/')[1]
        score = db.getUser(follower)['botness']['score']
        scorelist.append(score)

    if scorelist:
        scores = pd.Series(scorelist, name='probability of follower bot') 
        ax = sns.distplot(scores) 
        fig = ax.get_figure()
        fig.savefig('testfig.png')
        botpercent = sum(np.array(scorelist)>cutoff) / len(scorelist)
        return botpercent
    else:
        return None
项目:seinfeld-analysis    作者:christopherjenness    | 项目源码 | 文件源码
def plot_word_frequency(df, words, category):
    plt.figure()
    allwords = df['lines'].str.cat(sep=' ')
    allwords = allwords.split()
    allwords = [word.lower() for word in allwords]
    allwords = np.array(allwords)
    indices = []
    for word in words:
        new_indices = np.where(allwords == word)
        for index in new_indices[0]:
            indices.append(index)

    sns.distplot(indices, rug=True, hist=False)
    plt.xlim(0, len(allwords))
    plt.ylabel("{category} word frequency".format(category=category))
    plt.xlabel("Time (words)")
    plt.title("{category} words over time".format(category=category),
              loc='left')
    plt.savefig("../plots/{category}_frequency".format(category=category))
项目:temci    作者:parttimenerd    | 项目源码 | 文件源码
def _barplot(self, first: RunData, second: RunData, property: str, size: int,
                 filename: str = None, show_ticks: bool = True) -> str:
        import matplotlib.pyplot as plt
        import seaborn as sns
        filename = filename or self._get_new_figure_filename()
        self._set_fig_size(size)
        length = min(len(first[property]), len(second[property]))
        first_prop = first[property][0:length]
        second_prop = second[property][0:length]
        min_xval = min(first_prop + second_prop)
        max_xval = max(first_prop + second_prop)
        bins = np.linspace(min_xval, max_xval, math.floor(math.sqrt(length) * size))
        sns.distplot(first_prop, bins=bins,label=first.description(), kde=False)
        sns.distplot(second_prop, bins=bins,label=second.description(), kde=False)
        if not show_ticks:
            plt.xticks([])
            plt.yticks([])
        plt.xlim(min_xval, max_xval)
        plt.legend()
        plt.savefig(filename)
        plt.close()
        return filename
项目:deep-learning-for-genomics    作者:chgroenbech    | 项目源码 | 文件源码
def plotHistogram(series, x_label, scale = "linear", normed=False, name = None):

    figure_name = "histogram"

    if name:
        figure_name = name + "_" + figure_name

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)

    seaborn.distplot(series, kde = False, norm_hist=normed,ax = axis)

    axis.set_yscale(scale)

    axis.set_xlabel(x_label)
    # axis.set_ylabel(y_label)

    data.saveFigure(figure, figure_name)
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def plot_dist(
        main_file, mask_file, xlabel, distribution=None, xlabel2=None,
        figsize=DINA4_LANDSCAPE):
    data = _get_values_inside_a_mask(main_file, mask_file)

    fig = plt.Figure(figsize=figsize)
    FigureCanvas(fig)

    gsp = GridSpec(2, 1)
    ax = fig.add_subplot(gsp[0, 0])
    sns.distplot(data.astype(np.double), kde=False, bins=100, ax=ax)
    ax.set_xlabel(xlabel)

    ax = fig.add_subplot(gsp[1, 0])
    sns.distplot(np.array(distribution).astype(np.double), ax=ax)
    cur_val = np.median(data)
    label = "{0!g}".format(cur_val)
    plot_vline(cur_val, label, ax=ax)
    ax.set_xlabel(xlabel2)

    return fig
项目:PythonPackages    作者:wanhanwan    | 项目源码 | 文件源码
def cross_section_hist(data, factor_name, date):
    '''???????????????????????

    ??
    --------------------------------
    data:DataFrame(index:[Date,IDs],factor1,factor2,...)
    factor_name:str
    date?str
    '''
    plot_data = data.ix[(date,), factor_name].reset_index(drop=True)

    ax = sns.distplot(plot_data)

    return ax

# ??3
# Quantile-Quantile?????????????
项目:syracuse_public    作者:dssg    | 项目源码 | 文件源码
def plot_predict_proba(y_pred_probs, clf, pdf=None):
    """Plots the predict proba distribution"""
    fig, ax = plt.subplots(1, figsize=(18, 8))
    sns.set_style("white")
    sns.set_context("poster",
                    font_scale=2.25,
                    rc={"lines.linewidth": 1.25, "lines.markersize": 8})
    sns.distplot(y_pred_probs)
    plt.xlabel('predict_proba')
    plt.ylabel('frequency')
    plt.title(clf + ' proba')
    if pdf:
        pdf.savefig()
        plt.close()
    else:
        plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_mean_bootstrap_exponential_readme():
    X = np.random.exponential(7, 4)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    posterior_samples = mean(X, 10000)
    l, r = highest_density_interval(posterior_samples)
    classical_l, classical_r = highest_density_interval(classical_samples)
    plt.subplot(2, 1, 1)
    plt.title('Bayesian Bootstrap of mean')
    sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples')
    plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
    plt.xlim(-1, 18)
    plt.legend()
    plt.subplot(2, 1, 2)
    plt.title('Classical Bootstrap of mean')
    sns.distplot(classical_samples, label='Classical Bootstrap Samples')
    plt.plot([classical_l, classical_r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
    plt.xlim(-1, 18)
    plt.legend()
    plt.savefig('readme_exponential.png', bbox_inches='tight')
项目:idea_relations    作者:Noahs-ARK    | 项目源码 | 文件源码
def joint_plot(x, y, xlabel=None,
               ylabel=None, xlim=None, ylim=None,
               loc="best", color='#0485d1',
               size=8, markersize=50, kind="kde",
               scatter_color="r"):
    with sns.axes_style("darkgrid"):
        if xlabel and ylabel:
            g = SubsampleJointGrid(xlabel, ylabel,
                    data=DataFrame(data={xlabel: x, ylabel: y}),
                    space=0.1, ratio=2, size=size, xlim=xlim, ylim=ylim)
        else:
            g = SubsampleJointGrid(x, y, size=size,
                    space=0.1, ratio=2, xlim=xlim, ylim=ylim)
        g.plot_joint(sns.kdeplot, shade=True, cmap="Blues")
        g.plot_sub_joint(plt.scatter, 1000, s=20, c=scatter_color, alpha=0.3)
        g.plot_marginals(sns.distplot, kde=False, rug=False)
        g.annotate(ss.pearsonr, fontsize=25, template="{stat} = {val:.2g}\np = {p:.2g}")
        g.ax_joint.set_yticklabels(g.ax_joint.get_yticks())
        g.ax_joint.set_xticklabels(g.ax_joint.get_xticks())
    return g
项目:flexCE    作者:bretthandrews    | 项目源码 | 文件源码
def joint_overplot(x, y, df, fig, color='r', marg_kws=None):
    """Overplot additional data on existing JointGrid instance.

    Args:
        x (str):
        y (str):
        df (DataFrame):
        fig: seaborn JointGrid instance.
        color (str): Color.
        marg_kws (dict): Keyword arguments to pass to plot_marginals().

    Returns:
        fig: seaborn JointGrid instance.
    """
    if marg_kws is None:
        marg_kws = dict(norm_hist=True,
                        hist_kws=dict(weights=df.Survivors.values))
    fig.x = df[x]
    fig.y = df[y]
    fig.plot_joint(plt.scatter, c=color)
    fig.plot_marginals(sns.distplot, color=color, kde=False, axlabel=False,
                       **marg_kws)
    return fig
项目:sampling    作者:e-baumer    | 项目源码 | 文件源码
def display_covariate_dist(self, covariate_list, save_file=None):
        '''
        '''

        n_covars = len(covariate_list)

        for covariate in covariate_list:
            g = sns.FacetGrid(self.data, col="arm_assignment")
            if len(self.data[covariate].unique())>2:
                g.map(sns.distplot, covariate, kde=False)
            else:
                g.map(sns.distplot, covariate, kde=False)
            if save_file:
                g.savefig(save_file, dpi=450)

        if save_file is None:
            sns.plt.show()
项目:paysage    作者:drckf    | 项目源码 | 文件源码
def weight_norm_histogram(rbm, show_plot=False, filename=None):
    import matplotlib.pyplot as plt
    import seaborn as sns

    fig, ax = plt.subplots()
    for l in range(rbm.num_weights):
        num_inputs = rbm.weights[l].shape[0]
        norm = be.to_numpy_array(be.norm(rbm.weights[l].W(), axis=0) / sqrt(num_inputs))
        sns.distplot(norm, ax=ax, label=str(l))
    ax.legend()

    if show_plot:
        plt.show(fig)
    if filename is not None:
        fig.savefig(filename)
    plt.close(fig)
项目:crop-seq    作者:epigen    | 项目源码 | 文件源码
def screen_zscore(series, axis=None, z_score=False, plot=True):
    """
    Calculate screen z score (difference between positive and negative controls).
    """
    Z = lambda pos, neg: 1 - (3 * (np.std(pos) + np.std(neg)) / (abs(np.mean(pos) - np.mean(neg))))

    if z_score:
        series = (series - series.mean()) / series.std()

    pos = series.ix[series.index[series.index.str.contains("Essential")]]
    neg = series.ix[series.index[series.index.str.contains("CTRL")]]

    z = Z(pos, neg)

    # Plot
    if plot:
        pos.name = None
        neg.name = None
        if axis is None:
            fig, axis = plt.subplots(1)
        sns.distplot(pos, ax=axis, label="positive controls")
        sns.distplot(neg, ax=axis, label="negative controls; screen Z-score = {}".format(z))

    return z
项目:guacml    作者:guacml    | 项目源码 | 文件源码
def target_plot(self):
        target_type = self.input_data.metadata.loc[self.target].type
        target_data = self.input_data.df[self.target]
        sns.set(style="white", color_codes=True)
        if not self.run_time_config['is_time_series']:
            if target_type == ColType.BINARY:
                plt.figure(figsize=(6, 1))
                sns.barplot(target_data.sum() / target_data.shape[0])
                plt.xlim([0, 1])
                plt.title(target_data.name + ' rate')
            elif target_type == ColType.NUMERIC or target_type == ColType.ORDINAL:
                plt.figure(figsize=(6, 2))
                ax = sns.distplot(target_data, hist_kws=dict(edgecolor='black'))
                ax.set_xlim(target_data.min(), target_data.max())
                plt.title(target_data.name + ' histogram')
        else:
            self.time_series_target_plot()
项目:kaggle-review    作者:daxiongshu    | 项目源码 | 文件源码
def distribution(data,xlabel="data",ylabel="percentage",name=None):
    ax = plt.axes()
    ax.set(xlabel=xlabel,ylabel=ylabel)
    ds = sns.distplot(data,ax=ax)
    plt.show()
    if name is not None:
        ds.get_figure().savefig(name)
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def weather_distribution(self):
        data_dir = g_singletonDataFilePath.getTrainDir()
        self.gapdf = self.load_weatherdf(data_dir)
        print self.gapdf['weather'].describe()
#         sns.distplot(self.gapdf['gap'],kde=False, bins=100);

        sns.countplot(x="weather", data=self.gapdf, palette="Greens_d");
        plt.title('Countplot of Weather')
#         self.gapdf['weather'].plot(kind='bar')
#         plt.xlabel('Weather')
#         plt.title('Histogram of Weather')
        return
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def gapdistricution(self):
        data_dir = g_singletonDataFilePath.getTrainDir()
        self.gapdf = self.load_gapdf(data_dir)
        print self.gapdf['gap'].describe()
#         sns.distplot(self.gapdf['gap'],kde=False, bins=100);
        self.gapdf['gap'].plot(kind='hist', bins=200)
        plt.xlabel('Gaps')
        plt.title('Histogram of Gaps')

        return
项目:krafters    作者:GianlucaBortoli    | 项目源码 | 文件源码
def generateDistributionPlot(test):

    sns.set(color_codes=True)
    for row in test:
        label = row.pop(0)
        d = [float(i) for i in row]
        # Plot a filled kernel density estimate
        sns.distplot(d, hist=False, kde_kws={"shade": True}, label=label)
    plt.xlim([-0.01, 0.1])
    plt.xlabel("time (s)")
    plt.ylabel("operations")
项目:QDREN    作者:andreamad8    | 项目源码 | 文件源码
def plot_dist(train_y,dev_y,test_y):
    import seaborn as sns
    import matplotlib.pyplot as plt
    plt.rc('text', usetex=True)
    plt.rc('font', family='Times-Roman')
    sns.set_style(style='white')
    color = sns.color_palette("Set2", 10)
    fig = plt.figure(figsize=(8,12))

    ax1 = fig.add_subplot(3, 1, 1)
    # plt.title("Label distribution",fontsize=20)
    sns.distplot(train_y,kde=False,label='Training', hist=True, norm_hist=True,color="blue")
    ax1.set_xlabel("Answer")
    ax1.set_ylabel("Frequency")
    ax1.set_xlim([0,500])
    plt.legend(loc='best')

    ax2 = fig.add_subplot(3, 1, 2)
    sns.distplot(dev_y,kde=False,label='Validation', hist=True, norm_hist=True,color="green")
    ax2.set_xlabel("Answer")
    ax2.set_ylabel("Frequency")
    ax2.set_xlim([0,500])
    plt.legend(loc='best')

    ax3 = fig.add_subplot(3, 1, 3)
    sns.distplot(test_y,kde=False,label='Test', hist=True, norm_hist=True,color="red")
    ax3.set_xlabel("Answer")
    ax3.set_ylabel("Frequency")
    ax3.set_xlim([0,500])
    plt.legend(loc='best')



    plt.savefig('checkpoints/label_dist.pdf', format='pdf', dpi=300)

    plt.show()
项目:WebAppEx    作者:karlafej    | 项目源码 | 文件源码
def getPlot(self, params):
        n = int(params['bins'])
        fig = sns.distplot(self.likes, bins=n, rug = False, kde = False, hist_kws=dict(alpha=0.75, edgecolor="k", linewidth=1))
        fig.set_xlabel('Number of likes')
        fig.set_ylabel('Count')
        return fig
项目:pyflux    作者:RJT1990    | 项目源码 | 文件源码
def plot_z(self,indices=None,figsize=(15,5),loc=1):
        import matplotlib.pyplot as plt
        import matplotlib.mlab as mlab
        import seaborn as sns

        plt.figure(figsize=figsize) 
        for z in range(1,len(self.z_list)+1):
            if indices is not None and z-1 not in indices:
                continue
            else:
                if hasattr(self.z_list[z-1], 'sample'):
                    sns.distplot(self.z_list[z-1].prior.transform(self.z_list[z-1].sample), rug=False, hist=False,label=self.z_list[z-1].method + ' estimate of ' + self.z_list[z-1].name)

                elif hasattr(self.z_list[z-1], 'value') and hasattr(self.z_list[z-1], 'std'): 

                    if self.z_list[z-1].prior.transform_name is None:
                        x = np.linspace(self.z_list[z-1].value-self.z_list[z-1].std*3.5,self.z_list[z-1].value+self.z_list[z-1].std*3.5,100)
                        plt.plot(x,mlab.normpdf(x,self.z_list[z-1].value,self.z_list[z-1].std),label=self.z_list[z-1].method + ' estimate of ' + self.z_list[z-1].name)
                    else:
                        sims = self.z_list[z-1].prior.transform(np.random.normal(self.z_list[z-1].value,self.z_list[z-1].std,100000))
                        sns.distplot(sims, rug=False, hist=False,label=self.z_list[z-1].method + ' estimate of ' + self.z_list[z-1].name)


                else:
                    raise ValueError("No information on latent variable to plot!")        

        plt.xlabel('Value')
        plt.ylabel('Frequency')
        plt.title('Latent Variable Plot')
        plt.legend(loc=1)
        plt.show()
项目:pyflux    作者:RJT1990    | 项目源码 | 文件源码
def trace_plot(self,figsize=(15,15)):
        import matplotlib.pyplot as plt
        import matplotlib.mlab as mlab
        import seaborn as sns

        if hasattr(self.z_list[0], 'sample'):
            fig = plt.figure(figsize=figsize)

            palette = [(0.2980392156862745, 0.4470588235294118, 0.6901960784313725), 
            (0.3333333333333333, 0.6588235294117647, 0.40784313725490196), 
            (0.7686274509803922, 0.3058823529411765, 0.3215686274509804), 
            (0.5058823529411764, 0.4470588235294118, 0.6980392156862745), 
            (0.8, 0.7254901960784313, 0.4549019607843137), 
            (0.39215686274509803, 0.7098039215686275, 0.803921568627451)] * len(self.z_list)

            for j in range(len(self.z_list)):
                chain = self.z_list[j].sample
                for k in range(4):
                    iteration = j*4 + k + 1
                    ax = fig.add_subplot(len(self.z_list),4,iteration)
                    if iteration in range(1,len(self.z_list)*4 + 1,4):
                        a = sns.distplot(self.z_list[j].prior.transform(chain), rug=False, hist=False,color=palette[j])
                        a.set_ylabel(self.z_list[j].name)
                        if iteration == 1:
                            a.set_title('Density Estimate')
                    elif iteration in range(2,len(self.z_list)*4 + 1,4):
                        a = plt.plot(self.z_list[j].prior.transform(chain),color=palette[j])
                        if iteration == 2:
                            plt.title('Trace Plot')
                    elif iteration in range(3,len(self.z_list)*4 + 1,4): 
                        plt.plot(np.cumsum(self.z_list[j].prior.transform(chain))/np.array(range(1,len(chain)+1)),color=palette[j])
                        if iteration == 3:
                            plt.title('Cumulative Average')                 
                    elif iteration in range(4,len(self.z_list)*4 + 1,4):
                        plt.bar(range(1,10),[acf(chain,lag) for lag in range(1,10)],color=palette[j])
                        if iteration == 4:
                            plt.title('ACF Plot')                       
            sns.plt.show()  
        else:
            raise ValueError("No samples to plot!")
项目:AlphaPy    作者:ScottFreeLLC    | 项目源码 | 文件源码
def plot_distribution(df, target, tag='eda', directory=None):
    r"""Display a Distribution Plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the ``target`` feature.
    target : str
        The target variable for the distribution plot.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    http://seaborn.pydata.org/generated/seaborn.distplot.html

    """

    logger.info("Generating Distribution Plot")

    # Generate the distribution plot

    dist_plot = sns.distplot(df[target])
    dist_fig = dist_plot.get_figure()

    # Save the plot
    write_plot('seaborn', dist_fig, 'distribution_plot', tag, directory)


#
# Function plot_box
#
项目:vinci    作者:Phylliade    | 项目源码 | 文件源码
def plot_action_distribution(actions, file="action_ditribution.png"):
    plt.figure(figsize=(10, 10))
    sb.distplot(actions, kde=False)
    plt.ylabel("probability")
    plt.xlabel("action")
    plt.title("Action distribution")
    plt.savefig(file)
    plt.close()
项目:vinci    作者:Phylliade    | 项目源码 | 文件源码
def plot_value_distribution(values, file="value_distribution.png"):
    plt.figure(figsize=(10, 10))
    sb.distplot(values)
    plt.xlabel("critic value")
    plt.title("Value distribution")
项目:vinci    作者:Phylliade    | 项目源码 | 文件源码
def action_distribution(actions, ax=None, file="action_ditribution.png"):
    plt.figure(figsize=(10, 10))
    sb.distplot(actions, kde=False, ax=ax)
    plt.ylabel("probability")
    plt.xlabel("action")
    plt.title("Action distribution")
    plt.savefig(file)
    plt.close()
项目:importance-sampling    作者:idiap    | 项目源码 | 文件源码
def update(data, ax, xlim, ylim, vl):
    ax.clear()
    sns.distplot(data, ax=ax)
    if xlim:
        ax.set_xlim(xlim)
    if ylim:
        ax.set_ylim(ylim)

    if vl is not None:
        ax.plot([vl, vl], ax.get_ylim(), "k--")

    return ax
项目:data_utilities    作者:fmv1992    | 项目源码 | 文件源码
def generate_test_figures_2d_histogram(cls):
        """generate_test_figures_2d_histogram class method.

        Generate a tuple of 2d histogram figures.

        """
        # Create series. Will be divided by more than //2 when all plots are
        # ready.
        def dist_function01(): return np.random.normal(
            size=cls.n_lines_test_pandas)

        def dist_function02(): return np.random.randint(
            0,
            99999) * np.arange(cls.n_lines_test_pandas)

        def dist_function03(): return np.random.randint(
            0,
            99999) * np.ones(cls.n_lines_test_pandas)
        dist_functions = (dist_function01, dist_function02, dist_function03)
        iterable_of_series = (pd.Series(np.random.choice(dist_functions)())
                              for _ in range(cls.n_graphical_tests//2))

        # Create figures from series.
        figures = tuple(map(
            cls.figure_from_plot_function,
            itertools.repeat(lambda x: sns.distplot(x, kde=False)),
            iterable_of_series))

        return figures
项目:data_utilities    作者:fmv1992    | 项目源码 | 文件源码
def histogram_of_floats(a,
                        *args,
                        **sns_distplot_kwargs):
    """Plot a histogram of floats with sane defauts.

    Arguments:
        a (pd.Series): Float series to create a histogram plot.

    Returns:
        matplotlib.axes.Axes: the plotted axes.

    Examples:
        >>> import pandas_utilities as pu
        >>> float_serie = pu.dummy_dataframe().float_0
        >>> fig = plt.figure()
        >>> axes = histogram_of_floats(float_serie, kde=False)
        >>> isinstance(axes, matplotlib.axes.Axes)
        True
        >>> fig.savefig('/tmp/doctest_{0}.png'.format(                        \
        'histogram_of_floats'), dpi=500)

    """
    axes = sns.distplot(
        a,
        *args,
        **sns_distplot_kwargs)
    return axes
项目:tensorflow_kaggle_house_price    作者:Cuongvn08    | 项目源码 | 文件源码
def display_distrib(pd, feature):
    plt.figure()
    sns.distplot(pd[feature].dropna() , fit=norm);
    (mu, sigma) = norm.fit(pd[feature].dropna())    

    plt.legend(['Normal dist. ($\mu=$ {:.2f} and $\sigma=$ {:.2f} )'.format(mu, sigma)], loc='best')
    plt.ylabel('Frequency')
    plt.title('SalePrice distribution')
    plt.show()
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def plot_fd(fd_file, fd_radius, mean_fd_dist=None, figsize=DINA4_LANDSCAPE):

    fd_power = _calc_fd(fd_file, fd_radius)

    fig = plt.Figure(figsize=figsize)
    FigureCanvas(fig)

    if mean_fd_dist:
        grid = GridSpec(2, 4)
    else:
        grid = GridSpec(1, 2, width_ratios=[3, 1])
        grid.update(hspace=1.0, right=0.95, left=0.1, bottom=0.2)

    ax = fig.add_subplot(grid[0, :-1])
    ax.plot(fd_power)
    ax.set_xlim((0, len(fd_power)))
    ax.set_ylabel("Frame Displacement [mm]")
    ax.set_xlabel("Frame number")
    ylim = ax.get_ylim()

    ax = fig.add_subplot(grid[0, -1])
    sns.distplot(fd_power, vertical=True, ax=ax)
    ax.set_ylim(ylim)

    if mean_fd_dist:
        ax = fig.add_subplot(grid[1, :])
        sns.distplot(mean_fd_dist, ax=ax)
        ax.set_xlabel("Mean Frame Displacement (over all subjects) [mm]")
        mean_fd = fd_power.mean()
        label = r'$\overline{{\text{{FD}}}}$ = {0:g}'.format(mean_fd)
        plot_vline(mean_fd, label, ax=ax)

    return fig
项目:eqnet    作者:mast-group    | 项目源码 | 文件源码
def plot_distribution(data, title):
    data = np.array([d for d in data])
    sns.distplot(data, rug=True)
    plt.title(title)
    plt.show()
项目:deepcpg    作者:cangermueller    | 项目源码 | 文件源码
def plot_filter_densities(densities, filename=None):
    sns.set(font_scale=1.3)
    fig, ax = plt.subplots()
    sns.distplot(densities, kde=False, ax=ax)
    ax.set_xlabel('Activation')
    if filename:
        fig.savefig(filename)
        plt.close()
项目:xplore    作者:fahd09    | 项目源码 | 文件源码
def explore_feature_variation(self, col=None, use_target=False, **kwargs):
        '''
        Produces univariate plots of a given set of columns. Barplots are used
        for categorical columns while histograms (with fitted density functinos)
        are used for numerical columns.

        If use_target is true, then the variation of the given set of columns
        with respect to the response variable are used (e.g., 2d scatter 
        plots, boxplots, etc).

        Parameters
        ----------
        col : a string of a column name, or a list of many columns names or
                None (default). If col is None, all columns will be used.
        use_target : bool, default False
            Whether to use the target column in the plots.
        **kwargs: additional arguments to be passed to seaborn's distplot or
            to pandas's plotting utilities..
        '''            
        self._validate_params(params_list   = {'col':col},
                              expected_types= {'col':[str,list,type(None)]})        


        if type(col) is str: col = [col]
        if col is None: col = self._get_all_features()
        if use_target == False:
            for column in col:
                if self.is_numeric(self.df[column]) == True:
                    plt.figure(column)
                    #sns.despine(left=True)        
                    sns.distplot(self.df[column], color="m", **kwargs) 
                    plt.title(column)
                    plt.tight_layout()            
                    #plt.figure('boxplot')
                    #sns.boxplot(x=self.df[col], palette="PRGn")
                    #sns.despine(offset=10, trim=True)     
                elif self.is_categorical(self.df[column]) == True:            
                    #print self.df[column].describe()
                    plt.figure(column)
                    #sns.despine(left=True)    
                    if len(self.df[column].unique()) > 30:
                        self.df[column].value_counts()[:20][::-1].plot.barh(**kwargs)
                        #top = pd.DataFrame(data=top)
                        #sns.barplot(y=top.index, x=top)                        
                    else:
                        self.df[column].value_counts()[::-1].plot.barh(**kwargs)
                        #sns.countplot(y=self.df[column])                    
                    plt.title(column)
                    plt.tight_layout()
                else:
                    raise TypeError('TYPE IS NOT SUPPORTED')
        else: # use target variable
            for column in col:
                self.explore_features_covariation(col1=column, col2=self.y, **kwargs)
项目:syracuse_public    作者:dssg    | 项目源码 | 文件源码
def plot_score_distribution(y_pred, so):
    """ Plots scores of predicted values """
    min_x = min(min(y_pred), 0)
    max_x = max(max(y_pred), 1)
    sns.distplot(y_pred, kde=False)
    plt.title("distribution of scores for {} model".format(so['model_name']))
    plt.xlabel("raw prediction score")
    plt.xlim([min_x, max_x])
    plt.ylabel("number of street segments")
    base = so['results_dir'] + so['model_name'] + "_" + \
        str(so['timestamp']) + "_" + so['break_window']
    plt.savefig(base + '_score_distribution.png', bbox_inches='tight')
    plt.close()
项目:sentisignal    作者:jonathanmanfield    | 项目源码 | 文件源码
def plot_pdf(df):
    df_num = df.select_dtypes(include=[np.float, np.int])

    # rows = df_num / 3

    # f, axes = plt.subplots(3, rows + 1)

    # print axes

    for index in df_num.columns:
        try:
            sns.distplot(df_num[index], color="m")
        except:
            print index, "error (probably Nan)"
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def continuous_plots(dataFrame, continuous_factors):
    plots = plt.subplots(len(continuous_factors), 2, figsize=(8,12))
    column = 0
    for factor in continuous_factors:
        sns.distplot(dataFrame[factor],ax=plots[1][0][column], label=factor)
        plots[1][0][column].legend()
        column += 1
    plt.tight_layout()
项目:SCFGP    作者:MaxInGaussian    | 项目源码 | 文件源码
def plot_dist(*args):
    import seaborn as sns
    for x in args:
        plt.figure()
        sns.distplot(x)
    plt.show()
项目:SCFGP    作者:MaxInGaussian    | 项目源码 | 文件源码
def plot_dist(*args):
    import seaborn as sns
    for x in args:
        plt.figure()
        sns.distplot(x)
    plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_mean_bootstrap():
    X = [-1, 0, 1]
    posterior_samples = mean(X, 10000)
    sns.distplot(posterior_samples)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_mean_resample_bootstrap():
    X = [-1, 0, 1]
    posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100)
    sns.distplot(posterior_samples)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_median():
    X = np.random.uniform(-1, 1, 10)
    posterior_samples = bayesian_bootstrap(X, np.median, 10000, 100)
    sns.distplot(posterior_samples)
    classical_samples = [np.median(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_var_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = var(X, 10000)
    sns.distplot(posterior_samples)
    classical_samples = [np.var(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_self_covar_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = covar(X, X, 10000)
    sns.distplot(posterior_samples)
    plt.show()
项目:bayesian_bootstrap    作者:lmc2179    | 项目源码 | 文件源码
def plot_var_resample_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = bayesian_bootstrap(X, np.var, 10000, 500)
    sns.distplot(posterior_samples)
    classical_samples = [np.var(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()