Python sklearn.utils 模块，resample() 实例源码

我们从Python开源项目中，提取了以下13个代码示例，用于说明如何使用sklearn.utils.resample()。

项目：DriverPower 作者：smshuai | 项目源码 | 文件源码

def dispersion_test(yhat, y, k=100):
    """ Implement the regression based dispersion test with k re-sampling.

    Args:
        yhat (np.array): predicted mutation count
        y (np.array): observed mutation count
        k (int):

    Returns:
        float, float: p-value, theta

    """
    theta = 0
    pval = 0
    for i in range(k):
        y_sub, yhat_sub = resample(y, yhat, random_state=i)
        # (np.power((y - yhat), 2) - y) / yhat for Poisson regression
        aux = (np.power((y_sub - yhat_sub), 2) - yhat_sub) / yhat_sub
        mod = sm.OLS(aux, yhat_sub)
        res = mod.fit()
        theta += res.params[0]
        pval += res.pvalues[0]
    theta = theta/k
    pval = pval/k
    return pval, theta

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_mean_bootstrap_exponential_readme():
    X = np.random.exponential(7, 4)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    posterior_samples = mean(X, 10000)
    l, r = highest_density_interval(posterior_samples)
    classical_l, classical_r = highest_density_interval(classical_samples)
    plt.subplot(2, 1, 1)
    plt.title('Bayesian Bootstrap of mean')
    sns.distplot(posterior_samples, label='Bayesian Bootstrap Samples')
    plt.plot([l, r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
    plt.xlim(-1, 18)
    plt.legend()
    plt.subplot(2, 1, 2)
    plt.title('Classical Bootstrap of mean')
    sns.distplot(classical_samples, label='Classical Bootstrap Samples')
    plt.plot([classical_l, classical_r], [0, 0], linewidth=5.0, marker='o', label='95% HDI')
    plt.xlim(-1, 18)
    plt.legend()
    plt.savefig('readme_exponential.png', bbox_inches='tight')

项目：fri 作者：lpfann | 项目源码 | 文件源码

def _fit_one_bootstrap(self, i):
        m = clone(self.model)
        m._ensemble = True

        X, y = self.X_, self.y_
        n = X.shape[0]
        n_samples = math.ceil(0.8 * n)

        # Get bootstrap set
        X_bs, y_bs = resample(X, y, replace=True,
                              n_samples=n_samples, random_state=self.bs_seed+i)

        m.fit(X_bs, y_bs)
        if self.model.shadow_features:
            return m.interval_, m._omegas, m._biase, m._shadowintervals
        else:
            return m.interval_, m._omegas, m._biase

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_mean_bootstrap():
    X = [-1, 0, 1]
    posterior_samples = mean(X, 10000)
    sns.distplot(posterior_samples)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_mean_resample_bootstrap():
    X = [-1, 0, 1]
    posterior_samples = bayesian_bootstrap(X, np.mean, 10000, 100)
    sns.distplot(posterior_samples)
    classical_samples = [np.mean(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_median():
    X = np.random.uniform(-1, 1, 10)
    posterior_samples = bayesian_bootstrap(X, np.median, 10000, 100)
    sns.distplot(posterior_samples)
    classical_samples = [np.median(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_var_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = var(X, 10000)
    sns.distplot(posterior_samples)
    classical_samples = [np.var(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_var_resample_bootstrap():
    X = np.random.uniform(-1, 1, 100)
    posterior_samples = bayesian_bootstrap(X, np.var, 10000, 500)
    sns.distplot(posterior_samples)
    classical_samples = [np.var(resample(X)) for _ in range(10000)]
    sns.distplot(classical_samples)
    plt.show()

项目：bayesian_bootstrap 作者：lmc2179 | 项目源码 | 文件源码

def plot_regression_bootstrap():
    X = np.array([[0], [1], [2], [3]])
    y = np.array([0, 1, 2, 3]) + np.random.normal(0, 1, 4)
    classical_samples = [LinearRegression().fit(*resample(X, y)).coef_ for _ in tqdm(range(10000))]
    posterior_samples =     bayesian_bootstrap_regression(X,
                                                          y,
                                                          lambda X, y: LinearRegression().fit(X, y).coef_,
                                                          10000,
                                                          1000)
    plt.scatter(X.reshape(-1, 1), y)
    plt.show()
    sns.distplot(classical_samples)
    sns.distplot(posterior_samples)
    plt.show()

项目：event-cui-transfer 作者：mit-ddig | 项目源码 | 文件源码

def bootstrap_sample(test_x, test_y, model, n):
    """Stratified bootstrap sampling of test data to
    generate confidence intervals.

    Arguments
    ----------
    test_x (pandas DataFrame): test data features.
    test_y (pandas Series): test outcome.

    Returns
    -------
    CI (tuple): tuple with lower and upper limit of 95% confidence interval
    """
    aucs = []
    for sample in range(n):
        ind_pos = np.where(test_y.values > 0)
        ind_neg = np.where(test_y.values <= 0)
        pos_x = test_x[ind_pos[0], ]
        neg_x = test_x[ind_neg[0], ]
        pos_y = test_y.iloc[ind_pos[0]]
        neg_y = test_y.iloc[ind_neg[0]]
        resampled_pos_x, resampled_pos_y = resample(pos_x, pos_y)
        resampled_neg_x, resampled_neg_y = resample(neg_x, neg_y)
        resampled_x = scipy.sparse.vstack((resampled_pos_x, resampled_neg_x))
        resampled_y = pd.concat((resampled_pos_y, resampled_neg_y), axis=0)
        probs = model.predict_proba(resampled_x)
        aucs.append(roc_auc_score(resampled_y.replace(
            to_replace=-1, value=0), probs[:, 1]))
    # Return 95% confidence interval
    CI = (np.percentile(aucs, 2.5), np.percentile(aucs, 97.5))
    return CI

项目：cohorts 作者：hammerlab | 项目源码 | 文件源码

def bootstrap_auc(df, col, pred_col, n_bootstrap=1000):
    """
    Calculate the boostrapped AUC for a given col trying to predict a pred_col.

    Parameters
    ----------
    df : pandas.DataFrame
    col : str
        column to retrieve the values from
    pred_col : str
        the column we're trying to predict
    n_boostrap : int
        the number of bootstrap samples

    Returns
    -------
    list : AUCs for each sampling
    """
    scores = np.zeros(n_bootstrap)
    old_len = len(df)
    df.dropna(subset=[col], inplace=True)
    new_len = len(df)
    if new_len < old_len:
        logger.info("Dropping NaN values in %s to go from %d to %d rows" % (col, old_len, new_len))
    preds = df[pred_col].astype(int)
    for i in range(n_bootstrap):
        sampled_counts, sampled_pred = resample(df[col], preds)
        if is_single_class(sampled_pred, col=pred_col):
            continue
        scores[i] = roc_auc_score(sampled_pred, sampled_counts)
    return scores

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_resample_noarg():
    # Border case not worth mentioning in doctests
    assert_true(resample() is None)

项目：Parallel-SGD 作者：angadgill | 项目源码 | 文件源码

def test_resample_value_errors():
    # Check that invalid arguments yield ValueError
    assert_raises(ValueError, resample, [0], [0, 1])
    assert_raises(ValueError, resample, [0, 1], [0, 1], n_samples=3)
    assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)