我们从Python开源项目中,提取了以下48个代码示例,用于说明如何使用scipy.stats.norm.ppf()。
def _logcdf(self, samples): lower = np.full(2, -np.inf) upper = norm.ppf(samples) limit_flags = np.zeros(2) if upper.shape[0] > 0: def func1d(upper1d): ''' Calculates the multivariate normal cumulative distribution function of a single sample. ''' return mvn.mvndst(lower, upper1d, limit_flags, self.theta)[1] vals = np.apply_along_axis(func1d, -1, upper) else: vals = np.empty((0, )) old_settings = np.seterr(divide='ignore') vals = np.log(vals) np.seterr(**old_settings) vals[np.any(samples == 0.0, axis=1)] = -np.inf vals[samples[:, 0] == 1.0] = np.log(samples[samples[:, 0] == 1.0, 1]) vals[samples[:, 1] == 1.0] = np.log(samples[samples[:, 1] == 1.0, 0]) return vals
def bias_corrected_ci(true_coeff, samples, conf=95): """ Return the bias-corrected bootstrap confidence interval, using the method from the book. :param true_coeff: The estimates in the original sample :param samples: The bootstrapped estimates :param conf: The level of the desired confidence interval :return: The bias-corrected LLCI and ULCI for the bootstrapped estimates. """ ptilde = (samples < true_coeff).mean() Z = norm.ppf(ptilde) Zci = z_score(conf) Zlow, Zhigh = -Zci + 2 * Z, Zci + 2 * Z plow, phigh = norm._cdf(Zlow), norm._cdf(Zhigh) llci = np.percentile(samples, plow * 100, interpolation='lower') ulci = np.percentile(samples, phigh * 100, interpolation='higher') return llci, ulci
def dprime(confusion_matrix): """ Function takes in a 2x2 confusion matrix and returns the d-prime value for the predictions. d' = z(hit rate)-z(false alarm rate) http://en.wikipedia.org/wiki/D' """ if max(confusion_matrix.shape) > 2: return False else: hit_rate = confusion_matrix[0, 0] / confusion_matrix[0, :].sum() fa_rate = confusion_matrix[1, 0] / confusion_matrix[1, :].sum() nudge = 0.0001 if (hit_rate >= 1): hit_rate = 1 - nudge if (hit_rate <= 0): hit_rate = 0 + nudge if (fa_rate >= 1): fa_rate = 1 - nudge if (fa_rate <= 0): fa_rate = 0 + nudge dp = norm.ppf(hit_rate)-norm.ppf(fa_rate) return dp # accuracy (% correct)
def faureF(dim,nsims): array=np.empty((dim,nsims)) p=2 Prime=[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,\ 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,\ 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181,\ 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251,\ 257, 263, 269, 271, 277, 281] i=0 for i in range(0,nsims,1): array[0,i]=sumDigits(i,2) if dim==1: "Si la dimension est égale à 1, on prend 2 comme base" return array else: i=0 while p<dim: p=Prime[i+1] j=0 s=1 for s in range(1,dim,1): for j in range(0,nsims,1): array[s,j]=np.sum(toDigits3(j,p,s+1)) return norm.ppf(array)
def backward_transform(self, X): assert len(self.data["cols"]) == X.shape[1], "Backward Transform Error" if(self.algo == "min-max"): return X*(self.data["max"]-self.data["min"])+self.data["min"] elif(self.algo == "normal"): return X*self.data["std"]+self.data["mu"] elif(self.algo == "inv-normal"): return (norm.ppf(X)-self.data["mu"])/self.data["std"] elif(self.algo == "auto-normal"): lm = self.data['boxcox'][None, :] inv_boxcox = lambda x: np.sign(x*lm+1)*np.abs(x*lm+1)**(1./lm) tX = X*self.data["std"]+self.data["mu"] return inv_boxcox(tX)*(self.data["max"]-self.data["min"])+self.data["min"] elif(self.algo == "auto-inv-normal"): lm = self.data['boxcox'][None, :] inv_boxcox = lambda x: np.sign(x*lm+1)*np.abs(x*lm+1)**(1./lm) tX = norm.ppf(X, self.data["mu"], self.data["std"]) return inv_boxcox(tX)*(self.data["max"]-self.data["min"])+self.data["min"]
def frequency_test(generator, n_bits, sig_level=None, misc=None, n1=None): if n1 is None: n0, n1 = _calculate_n0_n1(generator, n_bits) else: n0 = n_bits - n1 x1 = ((n0 - n1) ** 2) / n_bits p_value = erfc(sqrt(x1 / 2)) if type(misc) is dict: misc.update(n0=n0, n1=n1, p_value=p_value) if sig_level is None: return x1 else: limit = chi2.ppf(1 - sig_level, 1) if type(misc) is dict: misc.update(x=x1, limit=limit) return x1 <= limit
def norm_std_from_iqr(mu, iqr): """Computes the standard deviation of a normal distribution with mean mu and IQR irq.""" # Assuming normal distribution (so symmetric), Q1 = m - (iqr / 2) Q1 = mu - (iqr / 2.0) # Assuming a normal distribution with mean m, std s, and first quartile Q1, # we have (Q1 - m)/s = ppf(0.25) return (Q1 - mu) / norm.ppf(0.25)
def tick_values(self, vmin, vmax) : eps = 0. return norm.cdf(AutoLocator.tick_values( self , norm.ppf( max(eps,vmin) ) , norm.ppf(min(1-eps,vmax)) ))
def transform_non_affine(self, a): res = np.zeros( len(a) ) goodId = np.where( (a<=1.) & (a>=0.) )[0] res[ goodId ] = norm.ppf(a[goodId] ) return res
def rvs(self, size=1): ''' Generates random variates from the mixed vine. Currently assumes a c-vine structure. Parameters ---------- size : integer, optional The number of samples to generate. (Default: 1) Returns ------- array_like n-by-d matrix of samples where n is the number of samples and d is the number of marginals. ''' if self.is_root_layer(): # Determine distribution dimension layer = self while not layer.is_marginal_layer(): layer = layer.input_layer dim = len(layer.marginals) samples = np.random.random(size=[size, dim]) (samples, _) = self.make_dependent(samples) # Use marginals to transform dependent uniform samples for i, marginal in enumerate(layer.marginals): samples[:, i] = marginal.ppf(samples[:, i]) return samples else: return self.output_layer.rvs(size)
def entropy(self, alpha=0.05, sem_tol=1e-3, mc_size=1000): ''' Estimates the entropy of the mixed vine. Parameters ---------- alpha : float, optional Significance level of the entropy estimate. (Default: 0.05) sem_tol : float, optional Maximum standard error as a stopping criterion. (Default: 1e-3) mc_size : integer, optional Number of samples that are drawn in each iteration of the Monte Carlo estimation. (Default: 1000) Returns ------- ent : float Estimate of the mixed vine entropy in bits. sem : float Standard error of the mixed vine entropy estimate in bits. ''' # Gaussian confidence interval for sem_tol and level alpha conf = norm.ppf(1 - alpha) sem = np.inf ent = 0.0 var_sum = 0.0 k = 0 while sem >= sem_tol: # Generate samples samples = self.rvs(mc_size) logp = self.logpdf(samples) log2p = logp[np.isfinite(logp)] / np.log(2) k += 1 # Monte-Carlo estimate of entropy ent += (-np.mean(log2p) - ent) / k # Estimate standard error var_sum += np.sum((-log2p - ent) ** 2) sem = conf * np.sqrt(var_sum / (k * mc_size * (k * mc_size - 1))) return ent, sem
def _logpdf(self, samples): if self.theta >= 1.0: vals = np.zeros(samples.shape[0]) vals[samples[:, 0] == samples[:, 1]] = np.inf elif self.theta <= -1.0: vals = np.zeros(samples.shape[0]) vals[samples[:, 0] == 1 - samples[:, 1]] = np.inf else: nrvs = norm.ppf(samples) vals = 2 * self.theta * nrvs[:, 0] * nrvs[:, 1] - self.theta**2 \ * (nrvs[:, 0]**2 + nrvs[:, 1]**2) vals /= 2 * (1 - self.theta**2) vals -= np.log(1 - self.theta**2) / 2 return vals
def _ccdf(self, samples): vals = np.zeros(samples.shape[0]) # Avoid subtraction of infinities neqz = np.bitwise_and(np.any(samples > 0.0, axis=1), np.any(samples < 1.0, axis=1)) nrvs = norm.ppf(samples[neqz, :]) vals[neqz] = norm.cdf((nrvs[:, 0] - self.theta * nrvs[:, 1]) / np.sqrt(1 - self.theta**2)) vals[np.invert(neqz)] = norm.cdf(0.0) return vals
def preprocess(self, x, fit=False): """Transform each marginal to be as close to a standard Gaussian as possible. 'standard' (default) just subtracts the mean and scales by the std. 'empirical' does an empirical gaussianization (but this cannot be inverted). 'outliers' tries to squeeze in the outliers Any other choice will skip the transformation.""" if self.missing_values is not None: x, self.n_obs = mean_impute(x, self.missing_values) # Creates a copy else: self.n_obs = len(x) if self.gaussianize == 'none': pass elif self.gaussianize == 'standard': if fit: mean = np.mean(x, axis=0) # std = np.std(x, axis=0, ddof=0).clip(1e-10) std = np.sqrt(np.sum((x - mean)**2, axis=0) / self.n_obs).clip(1e-10) self.theta = (mean, std) x = ((x - self.theta[0]) / self.theta[1]) if np.max(np.abs(x)) > 6 and self.verbose: print("Warning: outliers more than 6 stds away from mean. Consider using gaussianize='outliers'") elif self.gaussianize == 'outliers': if fit: mean = np.mean(x, axis=0) std = np.std(x, axis=0, ddof=0).clip(1e-10) self.theta = (mean, std) x = g((x - self.theta[0]) / self.theta[1]) # g truncates long tails elif self.gaussianize == 'empirical': print("Warning: correct inversion/transform of empirical gauss transform not implemented.") x = np.array([norm.ppf((rankdata(x_i) - 0.5) / len(x_i)) for x_i in x.T]).T if self.gpu and fit: # Don't return GPU matrices when only transforming x = cm.CUDAMatrix(x) return x
def obrien_fleming(information_fraction, alpha=0.05): """ Calculate an approximation of the O'Brien-Fleming alpha spending function. Args: information_fraction (scalar or array_like): share of the information amount at the point of evaluation, e.g. the share of the maximum sample size alpha: type-I error rate Returns: float: redistributed alpha value at the time point with the given information fraction """ return (1 - norm.cdf(norm.ppf(1 - alpha / 2) / np.sqrt(information_fraction))) * 2
def compute_margin_of_error(array_1d, confidence=0.95): """ Compute margin of error. Arguments: array_1d (array): confidence (float): 0 <= confidence <= 1 Returns: float: """ return norm.ppf(q=confidence) * array_1d.std() / sqrt(array_1d.size)
def _predict(self, steps, exog, alpha): assert 0 < alpha < 1 y = (exog if exog is not None else self._endog)[-self.results.k_ar:] forecast = self.results.forecast(y, steps) # FIXME: The following is adapted from statsmodels's # VAR.forecast_interval() as the original doesn't work q = norm.ppf(1 - alpha / 2) sigma = np.sqrt(np.abs(np.diagonal(self.results.mse(steps), axis1=2))) err = q * sigma return np.asarray([forecast, forecast - err, forecast + err])
def z_score(conf): """ :param conf: The level of confidence desired :return: The Z-score corresponding to the level of confidence desired. """ return norm.ppf((100 - (100 - conf) / 2) / 100)
def bs_delta_to_strike(fwd, delta, vol, texp): ys = norm.ppf(delta) return fwd/math.exp((ys - 0.5 * vol * math.sqrt(texp)) * vol * math.sqrt(texp))
def bachelier_delta_to_strike(fwd, delta, vol, texp): return fwd - norm.ppf(delta) * vol * math.sqrt(texp)
def generate(self, n_samples=100, batch_size=32, **kwargs): """ Sample new data from the generator network. Args: n_samples: int, the number of samples to be generated batch_size: int, number of generated samples are once Keyword Args: return_probs: bool, whether the output generations should be raw probabilities or sampled Bernoulli outcomes latent_samples: ndarray, alternative source of latent encoding, otherwise sampling will be applied Returns: The generated data as ndarray of shape (n_samples, data_dim) """ return_probs = kwargs.get('return_probs', True) latent_samples = kwargs.get('latent_samples', None) if latent_samples is not None: data_iterator, n_iters = self.data_iterator.iter(latent_samples, batch_size=batch_size, mode='generation') data_probs = self.generative_model.predict_generator(data_iterator, steps=n_iters) else: if self.latent_dim == 2: # perform 2d grid search n_samples_per_axis = complex(int(np.sqrt(n_samples))) uniform_grid = np.mgrid[0.01:0.99:n_samples_per_axis, 0.01:0.99:n_samples_per_axis].reshape(2, -1).T latent_samples = standard_gaussian.ppf(uniform_grid) else: latent_samples = np.random.standard_normal(size=(n_samples, self.latent_dim)) data_iterator, n_iters = self.data_iterator.iter(latent_samples, batch_size=batch_size, mode='generation') data_probs = self.generative_model.predict_generator(data_iterator, steps=n_iters) if return_probs: return data_probs sampled_data = np.random.binomial(1, p=data_probs) return sampled_data
def _transform_col(self, x, col): """Normalize one numerical column. Args: x (numpy.array): a numerical column to normalize col (int): column index Returns: A normalized feature vector. """ return norm.ppf(self.ecdfs[col](x) * .998 + .001)
def vdc(n, base): """ Cette fonction permet de calculer le n-ieme nombre de la base b de la séquence de Van Der Corput """ vdc, denom = 0,1 while n: denom *= base n, remainder = divmod(n, base) vdc += remainder / denom return norm.ppf(vdc)
def halton2(dim, nsims): """ la fonction ne crash plus. Version 2 de la suite d'halton sans la librairie Python existante. """ h = np.empty(nsims * dim) h.fill(np.nan) p = np.empty(nsims) p.fill(np.nan) Base = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31] lognsims = log(nsims + 1) for i in range(dim): base = Base[i] n = int(ceil(lognsims / log(base))) for t in range(n): p[t] = pow(base, -(t + 1) ) for j in range(nsims): d = j + 1 somme = fmod(d, base) * p[0] for t in range(1, n): d = floor(d / base) somme += fmod(d, base) * p[t] h[j*dim + i] = somme return norm.ppf(h.reshape(dim, nsims))
def stratified_sampling1(M,nsims): array=np.empty(nsims) array=lh.sample(M,nsims) array=norm.ppf(array) return array.reshape(M,nsims)
def stratified_sampling2(nnoises,nsims): """On divise l'intervalle [0,1] en plusieurs stratas, m stratas""" noises = np.empty((nnoises,nsims)) m=int(nnoises/nsims) i=0 """Pour chaque strata, on prend l échantillons suivant la loi uniforme tel que nsims=m*l et enfin on prend le pdf du cdf.""" for i in range(m+1,1): noises[i,:]=norm.ppf(np.random.uniform(i/m,(i+1)/m,nsims)) return noises
def stratified_sampling4(M,nsims): L=nsims/M i=0 noises=np.empty((M,nsims)) for i in range(M): noises[i,:]=norm.ppf(np.random.uniform(i/M,(i+1)/M,nsims)) return noises
def stratified_samplingF(dim,nsims): "Latin Hypercube Sample, une forme efficient de stratification à plusieurs dimensions" lhd=np.empty((dim,nsims)) lhd = lhs(dim, samples=nsims) lhd=norm.ppf(lhd) lhd=np.transpose(lhd) return lhd
def __StandardQTFun__(data): data0 = data.reset_index(level=0, drop=True) NotNAN = pd.notnull(data0) quantile = data0.rank(method='min') / NotNAN.sum() quantile.loc[quantile[data.columns[0]]==1, :] = 1 - 10 ** (-6) data_after_standard = norm.ppf(quantile) return pd.DataFrame(data_after_standard, index=data.index, columns=data.columns)
def estimateMonteCarloSampleCount(self, TOL): theta = self._calcTheta(TOL, self.bias) V = self.Vl_estimate[self.last_itr.lvls_find([])] if np.isnan(V): return np.nan Ca = norm.ppf(self.params.confidence) return np.maximum(np.reshape(self.params.M0, (1,))[-1], int(np.ceil((theta * TOL / Ca)**-2 * V))) ################## Bayesian specific functions
def _estimateAll(self): self._estimateQParams() self.iters[-1].Vl_estimate = self.fn.Norm(self.all_itr.calcDeltaVl()) \ if not self.params.bayesian \ else self._estimateBayesianVl() self.iters[-1].Wl_estimate = self.fn.WorkModel(lvls=self.last_itr.get_lvls()) self.iters[-1].bias = self._estimateBias() Ca = norm.ppf(self.params.confidence) self.iters[-1].stat_error = np.inf if np.any(self.last_itr.M == 0) \ else Ca * \ np.sqrt(np.sum(self.Vl_estimate / self.last_itr.M))
def _calcTheoryM(self, TOL, theta, Vl, Wl, ceil=True, minM=1): Ca = norm.ppf(self.params.confidence) M = (theta * TOL / Ca)**-2 *\ np.sum(np.sqrt(Wl * Vl)) * np.sqrt(Vl / Wl) M = np.maximum(M, minM) M[np.isnan(M)] = minM if ceil: M = np.ceil(M).astype(np.int) return M
def _confidence_interval_by_alpha(cls, p_hat, n, alpha, method='wald'): """Compute confidence interval for estimate of Bernoulli parameter p. Args: p_hat: maximum likelihood estimate of p n: samples observed alpha: the probability that the true p falls outside the CI Returns: left, right """ prob = 1 - 0.5 * alpha z = norm.ppf(prob) compute_ci = cls._confidence_interval_by_z_wald if method == 'wald' else cls._confidence_interval_by_z_wilson return compute_ci(p_hat, n, z)
def rz_ci(r, n, conf_level = 0.95): zr_se = pow(1/(n - 3), .5) moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se zu = atanh(r) + moe zl = atanh(r) - moe return tanh((zl, zu))
def get_AQ(score): score = float(score) percentage = get_percentage(score) z_score = norm.ppf(percentage) return int(100 + (z_score * 24))
def _breakpoints(n_bins, scale=1.): """Compute breakpoints for a given number of SAX symbols and a given Gaussian scale. Example ------- >>> _breakpoints(n_bins=2) array([ 0.]) """ return norm.ppf([float(a) / n_bins for a in range(1, n_bins)], scale=scale)
def _bin_medians(n_bins, scale=1.): """Compute median value corresponding to SAX symbols for a given Gaussian scale. Example ------- >>> _bin_medians(n_bins=2) array([-0.67448975, 0.67448975]) """ return norm.ppf([float(a) / (2 * n_bins) for a in range(1, 2 * n_bins, 2)], scale=scale)
def sample_manifold2d(vae, N): image = np.zeros((N*28, N*28)) for z1 in xrange(N): for z2 in xrange(N): z = [np.array([norm.ppf(z1*(1/N) + 1/(2*N)), norm.ppf(z2*(1/N) + 1/(2*N))])] sample = vae.sample(z).reshape((28, 28)) image[z1*28:(z1 + 1)*28,z2*28:(z2 + 1)*28] = sample image *= 255. plt.imshow(image.astype(np.uint8), cmap="gray") plt.show()
def serial_test(generator, n_bits, n1=None, sig_level=None, misc=None): if n1 is None: n0, n1 = _calculate_n0_n1(generator, n_bits) else: n0 = n_bits - n1 n_xx = [0] * 4 # number of occurrences of 00, 01, 10, 11 bi0 = generator.random_bit() # bit b[i] for i in range(n_bits - 1): bi1 = generator.random_bit() # bit b[i+1] n_xx[bi0 << 1 | bi1] += 1 bi0 = bi1 # Calculate the statistic x2 = 4 / (n_bits - 1) * (n_xx[0b00] ** 2 + n_xx[0b01] ** 2 + n_xx[0b10] ** 2 + n_xx[0b11] ** 2) x2 += -2 / n_bits * (n0 ** 2 + n1 ** 2) + 1 if type(misc) is dict: misc.update(n0=n0, n1=n1, n00=n_xx[0b00], n01=n_xx[0b01], n10=n_xx[0b10], n11=n_xx[0b11]) if sig_level is None: return x2 else: limit = chi2.ppf(1 - sig_level, 2) if type(misc) is dict: misc.update(x=x2, limit=limit) return x2 <= limit
def poker_test(generator, n_bits, m=None, sig_level=None, misc=None): if m is not None: if n_bits // m < 5 * (2 ** m): raise ValueError("Value m must satisfy requirement [n/m]>=5*2^m") else: # find the highest suitable m value m = int(log2(n_bits / 5)) while n_bits // m < 5 * (2 ** m): m -= 1 k = n_bits // m # Divide the sequence into k non-overlapping parts each of length m # and let ni be the number of occurrences of the ith type of sequence of length m, 1 <= i <= 2m. ni = [0] * (2 ** m) for i in range(0, k * m, m): t = concat_chunks([generator.random_bit() for _ in range(m)], bits=1) ni[t] += 1 x3 = (2 ** m) / k * sum(map(lambda x: x ** 2, ni)) - k if type(misc) is dict: misc.update(m=m, k=k, ni=ni) if sig_level is None: return x3 else: limit = chi2.ppf(1 - sig_level, (2 ** m) - 1) if type(misc) is dict: misc.update(x=x3, limit=limit) return x3 <= limit
def autocorrelation_test(generator, n_bits, d, sig_level=None, misc=None): if not (1 <= d <= n_bits // 2): raise ValueError("Parameter d must be between 1 and [n/2]=%d" % (n_bits // 2)) # random bits from i to i+d generated_bits = deque([generator.random_bit() for _ in range(d)], maxlen=d) a = 0 for i in range(n_bits - d): # a += sequence[i] ^ sequence[i + d] s_i_d = generator.random_bit() s_i_0 = generated_bits.popleft() generated_bits.append(s_i_d) a += s_i_0 ^ s_i_d # Calculate the statistic x5 = 2 * (a - (n_bits - d) / 2) / sqrt(n_bits - d) if type(misc) is dict: misc.update(a=a) if sig_level is None: return x5 else: limit = -norm.ppf(sig_level / 2) if type(misc) is dict: misc.update(x=x5, limit=limit) return -limit <= x5 <= limit
def generate(estimator): from scipy.stats import norm n = 15 # Figure row size figure = np.zeros((28 * n, 28 * n)) # Random normal distributions to feed network with x_axis = norm.ppf(np.linspace(0.05, 0.95, n)) y_axis = norm.ppf(np.linspace(0.05, 0.95, n)) samples = [] for i, x in enumerate(x_axis): for j, y in enumerate(y_axis): samples.append(np.array([x, y], dtype=np.float32)) samples = np.array(samples) x_reconstructed = estimator.generate( plx.processing.numpy_input_fn({'samples': samples}, batch_size=n * n, shuffle=False)) results = [x['results'] for x in x_reconstructed] for i, x in enumerate(x_axis): for j, y in enumerate(y_axis): digit = results[i * n + j].reshape(28, 28) figure[i * 28: (i + 1) * 28, j * 28: (j + 1) * 28] = digit try: import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') plt.show() except ImportError: pass
def Z(x): return norm.ppf(x)
def saxBreakpoints(cardinality): return norm.ppf(quantiles(cardinality))
def _make_latent_exploration_op(self): """ Code adapted from https://github.com/fastforwardlabs/vae-tf/blob/master/plot.py """ # ops for exploration of latent space nx = 30 ny = nx z_dim = self.target_dist.dim if self.latent_dist.dim==2: range_ = (0, 1) min_, max_ = range_ zs = np.rollaxis(np.mgrid[max_:min_:ny*1j, min_:max_:nx*1j], 0, 3) if isinstance(self.target_dist, Gaussian): from scipy.stats import norm DELTA = 1E-8 # delta to avoid +/- inf at 0, 1 boundaries zs = np.array([norm.ppf(np.clip(z, TINY, 1 - TINY), scale=self.target_dist.stddev) for z in zs]) else: raise NotImplementedError zs = tf.constant(zs.reshape((nx*ny, z_dim)), dtype=tf.float32) self.zs = tf.placeholder_with_default(zs, shape=[None, z_dim], name="zs") hs_decoded = self.decoder_template.construct(z_in=self.zs, phase=pt.Phase.test).tensor xs_dist_info = self.output_dist.activate_dist(hs_decoded) xs = self.output_dist.sample(xs_dist_info) imgs = tf.reshape(xs, [nx, ny] + list(self.dataset.image_shape)) stacked_img = [] for row in xrange(nx): row_img = [] for col in xrange(ny): row_img.append(imgs[row, col, :, :, :]) stacked_img.append(tf.concat(axis=1, values=row_img)) self.latent_exploration_op = tf.concat(axis=0, values=stacked_img)
def toy_data_quantile(n_samples=50, probs=0.5, pattern=SinePattern(), noise=(1., .2, 0., 1.5), random_state=None): """Sine wave toy dataset. The target y is computed as a sine curve at of modulated by a sine envelope with some period (default 1/3Hz) and mean (default 1). Moreover, this pattern is distorted with a random Gaussian noise with mean 0 and a linearly decreasing standard deviation (default from 1.2 at X = 0 to 0.2 at X = 1 . 5). Parameters ---------- n_samples : int Number of samples to generate. probs : list or float, shape = [n_quantiles], default=0.5 Probabilities (quantiles levels). pattern : callable, default = SinePattern() Callable which generates n_sample 1D data (inputs and targets). noise : :rtype: (float, float, float, float) Noise parameters (variance, shift, support_min, support_max). Returns ------- X : array, shape = [n_samples, 1] Input data. y : array, shape = [n_sample, 1] Targets. quantiles : array, shape = [n_samples x n_quantiles] True conditional quantiles. """ probs = array(probs, ndmin=1) noise_var, noise_shift, noise_min, noise_max = noise random_state = check_random_state(random_state) pattern.random_state = random_state inputs, targets = pattern(n_samples) # Noise decreasing std (from noise+0.2 to 0.2) noise_std = noise_shift + (noise_var * (noise_max - inputs) / (noise_max - noise_min)) # Gaussian noise with decreasing std add_noise = noise_std * random_state.randn(n_samples, 1) observations = targets + add_noise quantiles = [targets + array([norm.ppf(p, loc=0., scale=abs(noise_c)) for noise_c in noise_std]) for p in probs] return inputs, observations.ravel(), quantiles
def runs_test(generator, n_bits, sig_level=None, fips_style=False, misc=None): # The expected number of gaps (or blocks) of length i in a random sequence of length n is # e[i] = (n-i+3)=2^(i+2). Let k be equal to the largest integer i for which ei >= 5. def f_e(j): return (n_bits - j + 3) / 2 ** (j + 2) if fips_style: k = 6 else: k = 1 while f_e(k + 1) >= 5: k += 1 # Let B[i], G[i] be the number of blocks and gaps, respectively, of length i in s for each i, # 1 <= i <= k. run_bit = None run_length = 0 max_run_length = 0 b = [0] * k # zero-indexed g = [0] * k for i in range(n_bits + 1): bit = generator.random_bit() if i < n_bits else None # ongoing run if run_bit == bit: run_length += 1 # run ended (or it's the beginning or end) if run_bit != bit: if run_length > max_run_length: max_run_length = run_length if fips_style and run_length > 6: run_length = 6 if 1 <= run_length <= k: if run_bit == 0: g[run_length - 1] += 1 # zero-indexed! elif run_bit == 1: b[run_length - 1] += 1 # reset counter run_bit = bit run_length = 1 # Calculate the statistic: e = [f_e(i) for i in range(1, k + 1)] x4 = sum([(x - e[i]) ** 2 / e[i] for i, x in list(enumerate(b)) + list(enumerate(g))]) if type(misc) is dict: misc.update(k=k, b=b, g=g, e=e, max_run=max_run_length) if sig_level is None: return x4 else: limit = chi2.ppf(1 - sig_level, 2 * (k - 1)) if type(misc) is dict: misc.update(x=x4, limit=limit) return x4 <= limit
def mk_ts(df, const, group1, orderby = 'year', alpha = 0.05): """ df = dataframe const = variable tested for trend group1 = variable to group by orderby = variable to order by (typically a date) """ def zcalc(Sp, Varp): if Sp > 0: return (Sp - 1)/Varp**0.5 elif Sp < 0: return (Sp + 1)/Varp**0.5 else: return 0 df.is_copy = False df[const] = pd.to_numeric(df.ix[:,const]) # remove null values df[const].dropna(inplace=True) # remove index df.reset_index(inplace=True, drop=True) # sort by groups, then time df.sort_values(by=[group1,orderby],axis=0, inplace=True) # group by group and apply mk_test dg = df.groupby(group1).apply(lambda x: mk_test(x.loc[:,const].dropna().values, alpha)) Var_S = dg.loc[:,'varS'].sum() S = dg.loc[:,'s'].sum() N = dg.loc[:,'n'].sum() Z = zcalc(S,Var_S) P = 2*(1-norm.cdf(abs(Z))) group_n = len(dg) h = abs(Z) > norm.ppf(1-alpha/2) tau = S/dg.loc[:,'ta'].sum() if (Z<0) and h: trend = 'decreasing' elif (Z>0) and h: trend = 'increasing' else: trend = 'no trend' return pd.Series({'S':S, 'Z':round(Z,2), 'p':P, 'trend':trend, 'group_n':group_n, 'sample_n':N, 'Var_S':Var_S, 'tau':round(tau,2)})