我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用scipy.stats.binom()。
def test_beta_binomial_two_identical_models(db_path, sampler): binomial_n = 5 def model_fun(args): return {"result": st.binom(binomial_n, args.theta).rvs()} models = [model_fun for _ in range(2)] models = list(map(SimpleModel, models)) population_size = ConstantPopulationSize(800) parameter_given_model_prior_distribution = [Distribution(theta=st.beta( 1, 1)) for _ in range(2)] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(.1), sampler=sampler) abc.new(db_path, {"result": 2}) minimum_epsilon = .2 history = abc.run(minimum_epsilon, max_nr_populations=3) mp = history.get_model_probabilities(history.max_t) assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
def test_beta_binomial_two_identical_models_adaptive(db_path, sampler): binomial_n = 5 def model_fun(args): return {"result": st.binom(binomial_n, args.theta).rvs()} models = [model_fun for _ in range(2)] models = list(map(SimpleModel, models)) population_size = AdaptivePopulationSize(800) parameter_given_model_prior_distribution = [ Distribution(theta=st.beta(1, 1)) for _ in range(2)] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(.1), sampler=sampler) abc.new(db_path, {"result": 2}) minimum_epsilon = .2 history = abc.run(minimum_epsilon, max_nr_populations=3) mp = history.get_model_probabilities(history.max_t) assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
def test_pmf_pb_binom(): """Compare the probability mass function with the binomial limit case.""" # For equal probabilites p_j, the Poisson Binomial distribution reduces to # the Binomial one: p = [0.5, 0.5] pb = PoiBin(p) bn = binom(n=2, p=p[0]) # Compare to four digits behind the comma assert int(bn.pmf(0) * 10000) == int(pb.pmf(0) * 10000) # For different probabilities p_j, the Poisson Binomial distribution and # the Binomial distribution are different: pb = PoiBin([0.5, 0.8]) bn = binom(2, p=0.5) assert int(bn.pmf(0) * 10000) != int(pb.pmf(0) * 10000)
def test_cdf_pb_binom(): """Compare the cumulative distribution function with the binomial limit case. """ # For equal probabilites p_j, the Poisson Binomial distribution reduces # to the Binomial one: p = [0.5, 0.5] pb = PoiBin(p) bn = binom(n=2, p=p[0]) # Compare to four digits behind the comma assert int(bn.cdf(0) * 10000) == int(pb.cdf(0) * 10000) # For different probabilities p_j, the Poisson Binomial distribution and # the Binomial distribution are different: pb = PoiBin([0.5, 0.8]) bn = binom(2, p=0.5) assert int(bn.cdf(0) * 10000) != int(pb.cdf(0) * 10000)
def test_pval_pb_binom(): """Compare the p-values with the binomial limit case. Test that the p-values of the Poisson Binomial distribution are the same as the ones of the Binomial distribution when all the probabilities are equal. """ pi = np.around(np.random.random_sample(), decimals=2) ni = np.random.randint(5, 500) pp = [pi for i in range(ni)] bn = binom(n=ni, p=pi) k = np.random.randint(0, ni) pval_bn = 1 - bn.cdf(k) + bn.pmf(k) pb = PoiBin(pp) pval_pb = pb.pval(k) assert np.all(np.around(pval_bn, decimals=10) == np.around(pval_pb, decimals=10)) # PoiBin.get_cdf ---------------------------------------------------------------
def score_sub(self, sub_id, given_sums, means, smoothing=0): dist = stats.binom(given_sums.sum(), means[sub_id]) mu = dist.mean() sd = dist.std() dist = (given_sums[sub_id] - smoothing - mu) / sd return dist
def score_sub(sub_id, given_sums, means, smoothing=0): dist = stats.binom(given_sums.sum(), means[sub_id]) mu = dist.mean() sd = dist.std() dist = (given_sums[sub_id] - mu - smoothing) / sd return dist
def value(self,samples=1): """ Samples number of values given from the specific distribution. ------------------------------------------------------------------------ - samples: number of values that will be returned. """ value=0 try: for item in self.__params: if item==0: break if item==0: value=[0]*samples else: if self.__name=="b": value=self.binom(samples) if self.__name=="e": value=self.exponential(samples) if self.__name=="f": value=self.fixed(samples) if self.__name=="g": value=self.gamma(samples) if self.__name=="g1": value=self.gamma1(samples) if self.__name=="ln": value=self.lognormal(samples) if self.__name=="n": value=self.normal(samples) if self.__name=="nb": value=self.nbinom(samples) if self.__name=="p": value=self.poisson(samples) if self.__name=="u": value=self.uniform(samples) except Exception as ex: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] message="\n\tUnexpected: {0} | {1} - File: {2} - Line:{3}".format(\ ex,exc_type, fname, exc_tb.tb_lineno) status=False raise Exception(message) # self.appLogger.error(message) # sys.exit() return value
def binom(self,samples): """ Sampling from a binomial distribution ------------------------------------------------------------------------ - samples: number of values that will be returned. """ #n=number of times tested #p=probability n=self.__params[0] p=self.__params[1] distro=binom(n,p) f=distro.rvs(size=samples) return f
def test_beta_binomial_different_priors(db_path, sampler): binomial_n = 5 def model(args): return {"result": st.binom(binomial_n, args['theta']).rvs()} models = [model for _ in range(2)] models = list(map(SimpleModel, models)) population_size = ConstantPopulationSize(800) a1, b1 = 1, 1 a2, b2 = 10, 1 parameter_given_model_prior_distribution = [Distribution(theta=RV("beta", a1, b1)), Distribution(theta=RV("beta", a2, b2))] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(.1), sampler=sampler) n1 = 2 abc.new(db_path, {"result": n1}) minimum_epsilon = .2 history = abc.run(minimum_epsilon, max_nr_populations=3) mp = history.get_model_probabilities(history.max_t) def B(a, b): return gamma(a) * gamma(b) / gamma(a + b) def expected_p(a, b, n1): return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b) p1_expected_unnormalized = expected_p(a1, b1, n1) p2_expected_unnormalized = expected_p(a2, b2, n1) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .08
def test_pmf_accuracy(): """Compare accuracy of the probability mass function. Compare the results with the accuracy check proposed in [Hong2013]_, equation (15). """ [p1, p2, p3] = np.around(np.random.random_sample(size=3), decimals=2) [n1, n2, n3] = np.random.random_integers(1, 10, size=3) nn = n1 + n2 + n3 l1 = [p1 for i in range(n1)] l2 = [p2 for i in range(n2)] l3 = [p3 for i in range(n3)] p = l1 + l2 + l3 b1 = binom(n=n1, p=p1) b2 = binom(n=n2, p=p2) b3 = binom(n=n3, p=p3) k = np.random.randint(0, nn + 1) chi_bn = 0 for j in range(0, k+1): for i in range(0, j+1): chi_bn += b1.pmf(i) * b2.pmf(j - i) * b3.pmf(k - j) pb = PoiBin(p) chi_pb = pb.pmf(k) assert np.all(np.around(chi_bn, decimals=10) == np.around(chi_pb, decimals=10)) # PoiBin.cdf ------------------------------------------------------------------
def test_posterior(self): samples = 20 frames = 250 bins = 10 X = np.ndarray(shape=(samples, bins), dtype='uint16') for i, a in enumerate(np.linspace(0.0, 1.0, num=samples)): dist = stats.binom(bins - 1, a) X[i] = np.bincount(dist.rvs(size = frames), minlength=bins) grid_size = 200 parameter_grid = np.linspace(0.0, 0.25, num=grid_size) parameter_grid_deltas = parameter_grid[1:] - parameter_grid[:-1] parameter_distribution = np.ones(grid_size) / grid_size pmf = np.ndarray(shape=(grid_size, bins)) for i, p in enumerate(parameter_grid): pmf[i] = stats.binom(bins-1, p).pmf(np.arange(bins)) self.assertAlmostEqual(np.sum(pmf[i]), 1.0, delta=1.0e-6) p = posterior(X, parameter_grid_deltas, parameter_distribution, pmf) print p n = samples / 2 tol = 1.0e-200 self.assertTrue( np.allclose(p[-n:], 0.0, atol=tol) )
def test_separable(self): bins = 10 frames = 100 comp1 = compound_distribution( parameter_distribution=stats.uniform(0.0, 0.25), signal_family=lambda p: stats.binom(bins - 1, p), binarize_signal=False, bins = bins ) comp2 = compound_distribution( parameter_distribution=stats.uniform(0.5, 1.0), signal_family=lambda p: stats.binom(bins - 1, p), binarize_signal=False, bins=bins ) grid1 = np.linspace(0.0, 0.25, num=200) grid2 = np.linspace(0.5, 1.0, num=200) prior1, prior2 = 0.5, 0.5 gen = CompoundMC( category_priors=[prior1, prior2], compounds=[comp1, comp2], n_pixels=100, n_frames=frames ) cats, params, X = gen.rvs(size=1) clf = FastBayesianClassifier( priors=[prior1, prior2], compounds=[comp1, comp2], parameter_grids=[grid1, grid2] ) y = clf.predict_proba(X) print np.sum(np.argmax(cats, axis=1) != y)
def fit(samples, is_continuous): ''' Fits a distribution to the given samples. Parameters ---------- samples : array_like Array of samples. is_continuous : bool If `True` then a continuous distribution is fitted. Otherwise, a discrete distribution is fitted. Returns ------- best_marginal : Marginal The distribution fitted to `samples`. ''' # Mean and variance mean = np.mean(samples) var = np.var(samples) # Set suitable distributions if is_continuous: if np.any(samples <= 0): options = [norm] else: options = [norm, gamma] else: if var > mean: options = [poisson, binom, nbinom] else: options = [poisson, binom] params = np.empty(len(options), dtype=object) marginals = np.empty(len(options), dtype=object) # Fit parameters and construct marginals for i, dist in enumerate(options): if dist == poisson: params[i] = [mean] elif dist == binom: param_n = np.max(samples) param_p = np.sum(samples) / (param_n * len(samples)) params[i] = [param_n, param_p] elif dist == nbinom: param_n = mean * mean / (var - mean) param_p = mean / var params[i] = [param_n, param_p] else: params[i] = dist.fit(samples) rv_mixed = dist(*params[i]) marginals[i] = Marginal(rv_mixed) # Calculate Akaike information criterion aic = np.zeros(len(options)) for i, marginal in enumerate(marginals): aic[i] = 2 * len(params[i]) \ - 2 * np.sum(marginal.logpdf(samples)) best_marginal = marginals[np.argmin(aic)] return best_marginal
def test_beta_binomial_different_priors_initial_epsilon_from_sample(db_path, sampler): binomial_n = 5 def model(args): return {"result": st.binom(binomial_n, args.theta).rvs()} models = [model for _ in range(2)] models = list(map(SimpleModel, models)) population_size = ConstantPopulationSize(800) a1, b1 = 1, 1 a2, b2 = 10, 1 parameter_given_model_prior_distribution = [Distribution(theta=RV("beta", a1, b1)), Distribution(theta=RV("beta", a2, b2))] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(median_multiplier=.9), sampler=sampler) n1 = 2 abc.new(db_path, {"result": n1}) minimum_epsilon = -1 history = abc.run(minimum_epsilon, max_nr_populations=5) mp = history.get_model_probabilities(history.max_t) def B(a, b): return gamma(a) * gamma(b) / gamma(a + b) def expected_p(a, b, n1): return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b) p1_expected_unnormalized = expected_p(a1, b1, n1) p2_expected_unnormalized = expected_p(a2, b2, n1) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .08
def test_stratified_shuffle_split_even(): # Test the StratifiedShuffleSplit, indices are drawn with a # equal chance n_folds = 5 n_iter = 1000 def assert_counts_are_ok(idx_counts, p): # Here we test that the distribution of the counts # per index is close enough to a binomial threshold = 0.05 / n_splits bf = stats.binom(n_splits, p) for count in idx_counts: p = bf.pmf(count) assert_true(p > threshold, "An index is not drawn with chance corresponding " "to even draws") for n_samples in (6, 22): labels = np.array((n_samples // 2) * [0, 1]) splits = StratifiedShuffleSplit(n_iter=n_iter, test_size=1. / n_folds, random_state=0) train_counts = [0] * n_samples test_counts = [0] * n_samples n_splits = 0 for train, test in splits.split(X=np.ones(n_samples), y=labels): n_splits += 1 for counter, ids in [(train_counts, train), (test_counts, test)]: for id in ids: counter[id] += 1 assert_equal(n_splits, n_iter) n_train, n_test = _validate_shuffle_split(n_samples, test_size=1./n_folds, train_size=1.-(1./n_folds)) assert_equal(len(train), n_train) assert_equal(len(test), n_test) assert_equal(len(set(train).intersection(test)), 0) label_counts = np.unique(labels) assert_equal(splits.test_size, 1.0 / n_folds) assert_equal(n_train + n_test, len(labels)) assert_equal(len(label_counts), 2) ex_test_p = float(n_test) / n_samples ex_train_p = float(n_train) / n_samples assert_counts_are_ok(train_counts, ex_train_p) assert_counts_are_ok(test_counts, ex_test_p)
def test_stratified_shuffle_split_even(): # Test the StratifiedShuffleSplit, indices are drawn with a # equal chance n_folds = 5 n_iter = 1000 def assert_counts_are_ok(idx_counts, p): # Here we test that the distribution of the counts # per index is close enough to a binomial threshold = 0.05 / n_splits bf = stats.binom(n_splits, p) for count in idx_counts: p = bf.pmf(count) assert_true(p > threshold, "An index is not drawn with chance corresponding " "to even draws") for n_samples in (6, 22): labels = np.array((n_samples // 2) * [0, 1]) splits = cval.StratifiedShuffleSplit(labels, n_iter=n_iter, test_size=1. / n_folds, random_state=0) train_counts = [0] * n_samples test_counts = [0] * n_samples n_splits = 0 for train, test in splits: n_splits += 1 for counter, ids in [(train_counts, train), (test_counts, test)]: for id in ids: counter[id] += 1 assert_equal(n_splits, n_iter) assert_equal(len(train), splits.n_train) assert_equal(len(test), splits.n_test) assert_equal(len(set(train).intersection(test)), 0) label_counts = np.unique(labels) assert_equal(splits.test_size, 1.0 / n_folds) assert_equal(splits.n_train + splits.n_test, len(labels)) assert_equal(len(label_counts), 2) ex_test_p = float(splits.n_test) / n_samples ex_train_p = float(splits.n_train) / n_samples assert_counts_are_ok(train_counts, ex_train_p) assert_counts_are_ok(test_counts, ex_test_p)