我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.norm.cdf()。
def cov_ellipse(cov, q=None, nsig=None, **kwargs): """ Code is slightly modified, but essentially borrowed from: https://stackoverflow.com/questions/18764814/make-contour-of-scatter """ if q is not None: q = np.asarray(q) elif nsig is not None: q = 2 * norm.cdf(nsig) - 1 else: raise ValueError('Either `q` or `nsig` should be specified') r2 = chi2.ppf(q, 2) val, vec = np.linalg.eigh(cov) width, height = 2 * np.sqrt(val[:, None] * r2) rotation = np.degrees(np.arctan2(*vec[::-1, 0])) return width, height, rotation
def _erpac(xp, xa, n_perm, n_jobs): """Sub erpac function [xp] = [xa] = (npts, ntrials) """ npts, ntrials = xp.shape # Compute ERPAC xerpac = np.zeros((npts,)) for t in range(npts): xerpac[t] = circ_corrcc(xp[t, :], xa[t, :])[0] # Compute surrogates: data = Parallel(n_jobs=n_jobs)(delayed(_erpacSuro)( xp, xa, npts, ntrials) for pe in range(n_perm)) suro = np.array(data) # Normalize erpac: xerpac = (xerpac - suro.mean(0))/suro.std(0) # Get p-value: pvalue = norm.cdf(-np.abs(xerpac))*2 return xerpac, pvalue
def _mu(distr, z, eta): """The non-linearity (inverse link).""" if distr in ['softplus', 'gamma']: mu = np.log1p(np.exp(z)) elif distr == 'poisson': mu = z.copy() intercept = (1 - eta) * np.exp(eta) mu[z > eta] = z[z > eta] * np.exp(eta) + intercept mu[z <= eta] = np.exp(z[z <= eta]) elif distr == 'gaussian': mu = z elif distr == 'binomial': mu = expit(z) elif distr == 'probit': mu = norm.cdf(z) return mu
def ProbabilityImprovement(self, tau, mean, std): """ Probability of Improvement acquisition function. Parameters ---------- tau: float Best observed function evaluation. mean: float Point mean of the posterior process. std: float Point std of the posterior process. Returns ------- float Probability of improvement. """ z = (mean - tau - self.eps) / (std + self.eps) return norm.cdf(z)
def ExpectedImprovement(self, tau, mean, std): """ Expected Improvement acquisition function. Parameters ---------- tau: float Best observed function evaluation. mean: float Point mean of the posterior process. std: float Point std of the posterior process. Returns ------- float Expected improvement. """ z = (mean - tau - self.eps) / (std + self.eps) return (mean - tau) * norm.cdf(z) + std * norm.pdf(z)[0]
def tExpectedImprovement(self, tau, mean, std, nu=3.0): """ Expected Improvement acquisition function. Only to be used with `tStudentProcess` surrogate. Parameters ---------- tau: float Best observed function evaluation. mean: float Point mean of the posterior process. std: float Point std of the posterior process. Returns ------- float Expected improvement. """ gamma = (mean - tau - self.eps) / (std + self.eps) return gamma * std * t.cdf(gamma, df=nu) + std * (1 + (gamma ** 2 - 1)/(nu - 1)) * t.pdf(gamma, df=nu)
def EI(params, means, stand_devi, parms_done, y, n_iterations, k): """ Expected Improvement acquisition function :param params: test data :param means: GP posterior mean :param stand_devi: standard deviation :param parms_done: training data :param y: training targets :return: next point that need to pick up """ s = 0.0005 # small value max_mean = np.max(y) f_max = max_mean + s z = (means - f_max) / stand_devi EI_vector = (means - f_max) * norm.cdf(z) + stand_devi * norm.pdf(z) max_index = np.where(EI_vector == np.max(EI_vector)) next_point = params[max_index] if k == n_iterations-1: plt.subplot(2, 1, 2) plt.plot(params, EI_vector, label='EI') plt.legend(loc=3) return next_point
def task(seasons, parameters): ##seasonIndexList = getSeasonIndexList(leagueId) seasons = filterSeasonsByParams(seasons, parameters) printTitleBox(parameters.getLevelId()) predictedTotal = 0 actualTotal = 0 for season in seasons: print "%s %i teams," % (season.getSeasonId(), season.getTotalNumberOfTeams()) print " AWQI, APQI, AGCI, AOQI, ADQI, CPQI, Points, Points Pct \nAverages: %2.2f, %2.2f, %2.2f, %2.2f, %2.2f, %2.2f, %2.2f, %2.2f\nSigmas: %2.2f, %2.2f, %2.2f, %2.2f, %2.2f, %2.2f, %2.2f, %2.2f" % (season.getTeamStatAverage(team.Team.getAWQI), season.getTeamStatAverage(team.Team.getAPQI), season.getTeamStatAverage(team.Team.getAGCI), season.getTeamStatAverage(team.Team.getAOQI), season.getTeamStatAverage(team.Team.getADQI), season.getTeamStatAverage(team.Team.getCPQI), season.getTeamStatAverage(team.Team.getSeasonPointsTotal), season.getTeamStatAverage(team.Team.getPointsPercentage), standardDeviation(season.getTeamStatList(team.Team.getAWQI)), standardDeviation(season.getTeamStatList(team.Team.getAPQI)), standardDeviation(season.getTeamStatList(team.Team.getAGCI)), standardDeviation(season.getTeamStatList(team.Team.getAOQI)), standardDeviation(season.getTeamStatList(team.Team.getADQI)), standardDeviation(season.getTeamStatList(team.Team.getCPQI)), standardDeviation(season.getTeamStatList(team.Team.getSeasonPointsTotal)), standardDeviation(season.getTeamStatList(team.Team.getPointsPercentage))) oneSigma = standardDeviation(season.getTeamStatList(team.Team.getCPQI)) oneGoalSigma = 0.987/oneSigma prediction = (1.000-norm.cdf(oneGoalSigma))*season.getTotalNumberOfTeams() actual = season.getTeamsAboveOneGoalCutoff() print "One goal cutoff: %f sigma, above teams predicted, %.3f, actual %i\n" % (norm.cdf(oneGoalSigma), prediction, actual) predictedTotal += prediction actualTotal += actual print "%s oneGoal Team totals: predicted %i, actual %i" % (parameters.getLevelId(), predictedTotal, actualTotal)
def get_p_vals(self, X): ''' Imputes p-values from the Z-scores of `ScaledFScore` scores. Assuming incorrectly that the scaled f-scores are normally distributed. Parameters ---------- X : np.array Array of word counts, shape (N, 2) where N is the vocab size. X[:,0] is the positive class, while X[:,1] is the negative class. Returns ------- np.array of p-values ''' f_scores = ScaledFScore.get_scores(X[:,0], X[:,1], self.scaler_algo, self.beta) z_scores = (f_scores - np.mean(f_scores))/(np.std(f_scores)/np.sqrt(len(f_scores))) return norm.cdf(z_scores)
def forward_transform(self, X): tX = X[:, self.data["cols"]] if(self.algo == "min-max"): return (tX-self.data["min"])/(self.data["max"]-self.data["min"]) elif(self.algo == "normal"): return (tX-self.data["mu"])/self.data["std"] elif(self.algo == "inv-normal"): return norm.cdf((tX-self.data["mu"])/self.data["std"]) elif(self.algo == "auto-normal"): tX = (tX-self.data["min"])/(self.data["max"]-self.data["min"]) lm = self.data['boxcox'][None, :] boxcox = lambda x: (np.sign(x)*np.abs(x)**lm-1)/lm return (boxcox(tX)-self.data["mu"])/self.data["std"] elif(self.algo == "auto-inv-normal"): tX = (tX-self.data["min"])/(self.data["max"]-self.data["min"]) lm = self.data['boxcox'][None, :] boxcox = lambda x: (np.sign(x)*np.abs(x)**lm-1)/lm return norm.cdf(boxcox(tX), self.data["mu"], self.data["std"])
def updateInterface1D(self,solver,bbox,history,bounds): x = np.linspace(0, 1, 80) z=map(lambda y:bbox.queryAt([y]),x) xx=np.atleast_2d(x).T z_pred, sigma2_pred = solver.gp.predict(xx, eval_MSE=True) self.ax_scat.clear() self.ax_approx.clear() self.ax_eimap.clear() self.ax_scat.plot(x,np.array(z)) self.ax_scat.scatter(np.array(history)[:,0],np.array(history)[:,1]) self.ax_approx.plot(x,np.array(z_pred)) self.ax_approx.fill_between(x,np.array(z_pred)+np.array(np.sqrt(sigma2_pred)),np.array(z_pred)-np.array(np.sqrt(sigma2_pred)),alpha=0.2) self.ax_approx.plot(x) target=min(np.array(history)[:,1]) mean,variance=solver.gp.predict(xx,eval_MSE=True) z=(target-mean)/np.sqrt(variance) self.ax_approx.plot(x,np.sqrt(variance)*(z*norm.cdf(z)+norm.pdf(z)))
def update_interface_1D(ax,ax2,solver,bbox,history): x = np.linspace(0, 1, 80) z=map(lambda y:bbox.queryAt([y]),x) xx=np.atleast_2d(x).T z_pred, sigma2_pred = solver.gp.predict(xx, eval_MSE=True) ax.clear() ax2.clear() ax.plot(x,np.array(z)) ax.scatter(np.array(history)[:,0],np.array(history)[:,1]) ax2.plot(x,np.array(z_pred)) ax2.fill_between(x,np.array(z_pred)+np.array(np.sqrt(sigma2_pred)),np.array(z_pred)-np.array(np.sqrt(sigma2_pred)),alpha=0.2) ax.set_xlim([0,1]) ax2.set_xlim([0,1]) ax2.plot(x) target=min(np.array(history)[:,1]) mean,variance=solver.gp.predict(xx,eval_MSE=True) z=(target-mean)/np.sqrt(variance) ax2.plot(x,np.sqrt(variance)*(z*norm.cdf(z)+norm.pdf(z)))
def update_interface_2D(ax,ax2,ax3,solver,bbox,history): x = np.linspace(0, 1, 200) y = np.linspace(0, 1, 200) x,y=np.meshgrid(x, y) xx=x.ravel() yy=y.ravel() #z=map(lambda x:bbox.queryAt(x),[np.array(i) for i in zip(xx,yy)]) points_pred= np.array(map(lambda s:np.array(s),zip(xx,yy))) z_pred, sigma2_pred = solver.gp.predict(points_pred, eval_MSE=True) #target=min(map(lambda x:bbox.queryAt(x),[np.array(i) for i in history])) target=min(list(np.array(history)[:,1])) u=(target-z_pred)/np.sqrt(sigma2_pred) ax.clear() ax2.clear() #c1=ax.contourf(x,y,np.array(z).reshape(-1,len(x[0]))) tt=np.array(map(np.asarray,np.array(history).reshape(-1,2)[:,0])) ax.scatter(tt[:,0],tt[:,1]) c2=ax2.contourf(x,y,np.array(z_pred).reshape(-1,len(x[0]))) c3=ax3.contourf(x,y,np.array(np.sqrt(sigma2_pred)*(u*norm.cdf(u)+norm.pdf(u))).reshape(-1,len(x[0]))) ax2.scatter(tt[:,0],tt[:,1]) ax3.scatter(tt[:,0],tt[:,1]) #c1.set_clim(min(z),max(z)) #c2.set_clim(min(z),max(z))
def cumulative_neg_binom(x, n, p): """ Get the cumulative probability distribution for a negative binomial, over an array of points x that are log-scaled. :param x: Points at which to calculate probability density :type x: :class:`~numpy.ndarray` :param int n: Number of trials (see :data:`~scipy.stats.nbinom`) :param int p: Probability of success (see :data:`~scipy.stats.nbinom`) :returns: Cumulative probability distribution over x """ # x is in log, so transform it back x = list(10 ** x[1:]) # Add the point 0.0 x = [0.0] + x return nbinom.cdf(x, n, p)
def normal(x, loc, scale): """ Get the probability density for a normal distribution. :param x: Edges of bins within which to calculate probability density :type x: :class:`~numpy.ndarray` :param int loc: Mean of the distribution (see :data:`~scipy.stats.norm`) :param int scale: Standard deviation of the distribution \ (see :data:`~scipy.stats.norm`) :returns: Probability density over x. Return \ array has one less value than x, as the first value of \ the return array is the probability density between x[0] \ and x[1]. """ norm_y = norm.cdf(x, loc, scale) norm_y = un_cumulative(norm_y) return sum_to_1(norm_y)
def _BlackScholesCall(S, K, T, sigma, r, q): d1 = (log(S/K) + (r - q + (sigma**2)/2)*T)/(sigma*sqrt(T)) d2 = d1 - sigma*sqrt(T) return S*exp(-q*T)*norm.cdf(d1) - K*exp(-r*T)*norm.cdf(d2)
def _BlackScholesPut(S, K, T, sigma, r, q): d1 = (log(S/K) + (r - q + (sigma**2)/2)*T)/(sigma*sqrt(T)) d2 = d1 - sigma*sqrt(T) return K*exp(-r*T)*norm.cdf(-d2) - S*exp(-q*T)*norm.cdf(-d1)
def expected_improvement(self, optimiser, x): mu,std = optimiser.predict([x], return_std=True) current_best = max([score for score, params in self.hyperparam_history]) gamma = (current_best - mu[0])/std[0] exp_improv = std[0] * (gamma * norm.cdf(gamma) + norm.pdf(gamma)) return -1 * exp_improv
def probability_of_improvement(self, optimiser, x): mu,std = optimiser.predict([x], return_std=True) current_best = max([score for score, params in self.hyperparam_history]) gamma = (current_best - mu[0])/std[0] return -1 * norm.cdf(gamma)
def tick_values(self, vmin, vmax) : eps = 0. return norm.cdf(AutoLocator.tick_values( self , norm.ppf( max(eps,vmin) ) , norm.ppf(min(1-eps,vmax)) ))
def transform_non_affine(self, a): return norm.cdf(a)
def cdf(self, samples): ''' Calculates the cumulative distribution function. Parameters ---------- samples : array_like n-by-2 matrix of samples where n is the number of samples. Returns ------- ndarray Cumulative distribution function evaluated at `samples`. ''' return np.exp(self.logcdf(samples))
def _ccdf(self, samples): vals = np.zeros(samples.shape[0]) # Avoid subtraction of infinities neqz = np.bitwise_and(np.any(samples > 0.0, axis=1), np.any(samples < 1.0, axis=1)) nrvs = norm.ppf(samples[neqz, :]) vals[neqz] = norm.cdf((nrvs[:, 0] - self.theta * nrvs[:, 1]) / np.sqrt(1 - self.theta**2)) vals[np.invert(neqz)] = norm.cdf(0.0) return vals
def _ppcf(self, samples): nrvs = norm.ppf(samples) vals = norm.cdf(nrvs[:, 0] * np.sqrt(1 - self.theta**2) + self.theta * nrvs[:, 1]) return vals
def itransform(self, y_transformed): # FIXME: Interpolate rather than solve for speed? ycdf = norm.cdf(y_transformed) y = [brentq(self._obj, a=self.lb, b=self.ub, args=(yi,)) for yi in ycdf] return np.array(y)
def _get_levels(self): sigma2d = self.parent.config["sigma2d"] if sigma2d: levels = 1.0 - np.exp(-0.5 * self.parent.config["sigmas"] ** 2) else: levels = 2 * norm.cdf(self.parent.config["sigmas"]) - 1.0 return levels
def obrien_fleming(information_fraction, alpha=0.05): """ Calculate an approximation of the O'Brien-Fleming alpha spending function. Args: information_fraction (scalar or array_like): share of the information amount at the point of evaluation, e.g. the share of the maximum sample size alpha: type-I error rate Returns: float: redistributed alpha value at the time point with the given information fraction """ return (1 - norm.cdf(norm.ppf(1 - alpha / 2) / np.sqrt(information_fraction))) * 2
def calculate_prob(self, multiplicity: int, avg_coverage: float): mu = self.mu * multiplicity sigma = self.sigma * multiplicity difference = math.fabs(mu - avg_coverage) return 2*norm.cdf(mu - difference, loc=mu, scale=sigma)
def testCumulative(self): from scipy.stats import norm x = numpy.linspace(-5., 5., 200) samples = numpy.random.normal(size=100000) cumula = Cumulative(samples) c = cumula(x) cx = norm.cdf(x) for i in range(199): self.assertAlmostEqual(cx[i], c[i], delta=1e-2)
def CalcSupervDelta(self, Superv_Vol=None): if not Superv_Vol: return 1 if self.BuySell=="Buy" else -1 if (self.UnderlyingPrice * self.StrikePrice < 0): num = self.UnderlyingPrice - self.StrikePrice else: num = (log(self.UnderlyingPrice/self.StrikePrice)+0.5*Superv_Vol**2*self.Si) den = Superv_Vol*self.Si**0.5 temp = num/den flip = 1 if self.BuySell=="Buy" else -1 return flip*norm.cdf(temp) if self.OptionType == 'Call' else flip*-norm.cdf(-temp)
def compute_log_lik_exp(self, m, v, y): if m.ndim == 2: gh_x, gh_w = self._gh_points(GH_DEGREE) gh_x = gh_x[:, np.newaxis, np.newaxis] gh_w = gh_w[:, np.newaxis, np.newaxis] / np.sqrt(np.pi) v_expand = v[np.newaxis, :, :] m_expand = m[np.newaxis, :, :] ts = gh_x * np.sqrt(2 * v_expand) + m_expand logcdfs = norm.logcdf(ts * y) prods = gh_w * logcdfs loglik = np.sum(prods) pdfs = norm.pdf(ts * y) cdfs = norm.cdf(ts * y) grad_cdfs = y * gh_w * pdfs / cdfs dts_dm = 1 dts_dv = 0.5 * gh_x * np.sqrt(2 / v_expand) dm = np.sum(grad_cdfs * dts_dm, axis=0) dv = np.sum(grad_cdfs * dts_dv, axis=0) else: gh_x, gh_w = self._gh_points(GH_DEGREE) gh_x = gh_x[:, np.newaxis, np.newaxis, np.newaxis] gh_w = gh_w[:, np.newaxis, np.newaxis, np.newaxis] / np.sqrt(np.pi) v_expand = v[np.newaxis, :, :, :] m_expand = m[np.newaxis, :, :, :] ts = gh_x * np.sqrt(2 * v_expand) + m_expand logcdfs = norm.logcdf(ts * y) prods = gh_w * logcdfs prods_mean = np.mean(prods, axis=1) loglik = np.sum(prods_mean) pdfs = norm.pdf(ts * y) cdfs = norm.cdf(ts * y) grad_cdfs = y * gh_w * pdfs / cdfs dts_dm = 1 dts_dv = 0.5 * gh_x * np.sqrt(2 / v_expand) dm = np.sum(grad_cdfs * dts_dm, axis=0) / m.shape[0] dv = np.sum(grad_cdfs * dts_dv, axis=0) / m.shape[0] return loglik, dm, dv
def calculate_dv01(forward, strike, implied_sigma, annuity, days_to_maturity, pay_rec): from scipy.stats import norm if pay_rec == "Payer": extra_addend = 0 else: if pay_rec == "Receiver": extra_addend = -1 else: raise NotImplementedError() d1 = calculate_d1(forward, strike, implied_sigma, days_to_maturity) dv01 = annuity * (norm.cdf(d1) + extra_addend) return dv01
def __calculate_price(underlying, strike, annuity, domestic_short_rate, foreign_short_rate, implied_sigma, days_to_maturity, sign): """ P_t = A_t * (S_t * N(d_1) - k * e^(- (r_d - r_f) * T) * N(d_2)) :param underlying: :param strike: :param annuity: :param implied_sigma: :param days_to_maturity: :param sign: :return: """ from scipy.stats import norm from math import exp if sign != 1 and sign != -1: raise ValueError("Invalid Sign") d1 = calculate_d1(underlying, strike, domestic_short_rate, foreign_short_rate, implied_sigma, days_to_maturity) d2 = calculate_d2(underlying, strike, domestic_short_rate, foreign_short_rate, implied_sigma, days_to_maturity) r = domestic_short_rate - foreign_short_rate year_fraction = float(days_to_maturity) / 365 df = exp(-r * year_fraction) price = annuity * sign * (underlying * norm.cdf(sign * d1) - strike * df * norm.cdf(sign * d2)) return price
def phi(self, x): """Normalized Downwind and Upwind Distribution. In order to predict upwind fallout and at the same time preserve normalization, a function phi is empirically inserted.""" w = (self.L_0 / self.L) * (x / (self.s_x * self.a_1)) return norm.cdf(w)
def D_Hplus1(self, x, y, dunits='km', doseunits='Sv'): """Returns dose rate at x, y at 1 hour after burst. This value includes dose rate from all activity that WILL be deposited at that location, not just that that has arrived by H+1 hr.""" rx, ry = self.translation * (convert_units(x, dunits, 'mi'), convert_units(y, dunits, 'mi')) * ~Affine.rotation(-self.wd + 270) f_x = self.yld * 2e6 * self.phi(rx) * self.g(rx) * self.ff s_y = np.sqrt(self.s_02 + ((8 * abs(rx + 2 * self.s_x) * self.s_02) / self.L) + (2 * (self.s_x * self.T_c * self.s_h * self.shear)**2 / self.L_2) + (((rx + 2 * self.s_x) * self.L_0 * self.T_c * self.s_h * self.shear)**2 / self.L**4)) a_2 = 1 / (1 + ((0.001 * self.H_c * self.wind) / self.s_0) * (1 - norm.cdf(2 * x / self.wind))) f_y = np.exp(-0.5 * (ry / (a_2 * s_y))**2) / (2.5066282746310002 * s_y) return convert_units(f_x * f_y, 'Roentgen', doseunits)
def theta_fn(self, proj, libors, paydate): # theta of one coupon period ts, te, cov = self.__parse_libors(libors) eta = (te - paydate.t) / (te - ts) p = 1.0 + np.array([l.forward(proj) for l in libors]) * cov #p = proj(ts) / proj(te) # = (1 + libor * cov) xi = self.__xi(proj.t0, ts, te) # proj.t0 is spotdate of curve def capfloor(k, s): # forward/undiscounted caplet(s=1)/floorlet(s=-1) price zstar = np.log((1 + k) / p) / xi - xi / 2 return s * (p * norm.cdf(-zstar * s) - (1 + k) * norm.cdf(-(zstar + xi) * s)) def digital(k, s): # s=1 ==> 1 for above k; s=-1 ==> 1 for below k km = cov * (k - self.digi_gap * s) kp = cov * (k + self.digi_gap * s) return (1 + eta * kp) * capfloor(km,s) - (1 + eta * km) * capfloor(kp,s) denom = 2 * self.digi_gap * cov #* (1 + eta * (p - 1)) # p - 1 = libor * cov if self.rng_type == 'between': # inside; == (digital(rng_high,-1) - digital(rng_low,-1)) / denom ratio = (digital(self.rng_low,1) - digital(self.rng_high,1)) / denom elif self.rng_type == 'outside': # outside ratio = (digital(self.rng_low,-1) + digital(self.rng_high,1)) / denom elif self.rng_type == 'above': # above rate ratio = digital(self.rng_rate,1) / denom elif self.rng_type == 'below': # below rate ratio = digital(self.rng_rate,-1) / denom if self.lk_type == 'skipped': ratio = ratio[:self.lk_days] elif self.lk_type == 'crystallized': ratio[self.lk_days:] = ratio[self.lk_days] return np.mean(ratio)
def value(self, opt=Option.Call, strike=None): if strike is None: strike = self.forward d1 = self.__d1(strike) asset_value = opt * self.forward * norm.cdf(opt * d1) cash_value = opt * strike * norm.cdf(opt * (d1 - self.stdev)) return self.discount * (asset_value - cash_value)
def value(self, opt=Option.Call, strike=None): if strike is None: strike = self.forward dr = opt * (self.forward - strike) d1 = dr / self.stdev d2 = self.stdev * norm.pdf(d1) return self.discount * (dr * norm.cdf(d1) + d2)
def compute_EI(ni, alpha, lr, lr_time, X, y, ei_xi, x): var = np.var(lr.predict(X) - y) m = np.dot(X.T, X) inv = np.linalg.inv(m + alpha * np.eye(sum(ni))) x_flat = np.array(x) mu_x = lr.predict([x_flat]) var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T)) sigma_x = np.sqrt(var_x) u = (np.min(y) - ei_xi - mu_x) / sigma_x EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u)) estimated_time = lr_time.predict([x_flat])[0] EIPS = EI / estimated_time return EIPS
def get_next_by_EI(ni, alpha, lr, lr_time, X, y, ei_xi): ''' Args: ni: number of units in the each layer alpha: lambda for Ridge regression lr: fitted performance model in burning period lr_time: fitted time model in burning period X: all previous inputs x y: all previous observations corresponding to X ei_xi: parameter for EI exploitation-exploration trade-off Returns: x_next: a nested list [[0,1,0], [1,0,0,0], ...] as the next input x to run a specified pipeline ''' var = np.var(lr.predict(X) - y) m = np.dot(X.T, X) inv = np.linalg.inv(m + alpha * np.eye(sum(ni))) maxEI = float('-inf') x_next = None for i in range(np.prod(ni)): x = [[0]*n for n in ni] x_flat = [] pipeline = get_pipeline_by_flatten_index(ni, i) for layer in range(len(ni)): x[layer][pipeline[layer]] = 1 x_flat += x[layer] x_flat = np.array(x_flat) mu_x = lr.predict([x_flat]) var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T)) sigma_x = np.sqrt(var_x) u = (np.min(y) - ei_xi - mu_x) / sigma_x EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u)) estimated_time = lr_time.predict([x_flat])[0] EIPS = EI / estimated_time if EIPS > maxEI: maxEI = EIPS x_next = x return x_next
def _get_eis(self, points, score_optimum): """ Calculate the expected improvements for all points. Parameters ---------- points: list List of parameter settings for the GP to predict on score_optimum: float The score optimum value to use for calculating the difference against the expected value Returns ------- Returns the Expected Improvement """ # Predict mu's and sigmas for each point mu, sigma = self.score_regressor.predict(points, return_std=True) # Subtract each item in list by score_optimum # We subtract 0.01 because http://haikufactory.com/files/bayopt.pdf # (2.3.2 Exploration-exploitation trade-of) diff = mu - (score_optimum - 0.01) # Divide each diff by each sigma Z = diff / sigma # Calculate EI's ei = diff * norm.cdf(Z) + sigma * norm.pdf(Z) # Make EI zero when sigma is zero (but then -1 when sigma <= 1e-05 to be more sure that everything goes well) for index, value in enumerate(sigma): if value <= 1e-05: ei[index] = -1 return ei
def calc_log_normalizer(mu, sigma, l, h): return np.log( norm.cdf(h, loc=mu, scale=sigma) - norm.cdf(l, loc=mu, scale=sigma))
def PI(params, means, stand_devi, parms_done, y, n_iterations, k): """ Probability of Improvement acquisition function :param params: test data :param means: GP posterior mean :param stand_devi: standard deviation :param parms_done: training data :param y: training targets :return: next point that need to pick up """ s = 0.0005 # small value stop_threshold = 0.001 max_mean = np.max(y) f_max = max_mean + s z = (means - f_max) / stand_devi cumu_gaussian = norm.cdf(z) # early stop criterion, if there are less than 1% to get the greater cumulative Gaussian, then stop. if cumu_gaussian.sum() <= stop_threshold or np.max(cumu_gaussian) <= stop_threshold: print "all elements of cumulative are alost zeros!!!" return True indices = np.where(cumu_gaussian == np.max(cumu_gaussian))[0] indices = np.asarray(indices) # since there are several 1, random pick one of them as the next point except for parms_done rand_index = random.randint(0, len(indices) - 1) next_point = params[indices[rand_index]] condition = next_point in parms_done.tolist() # early stop criterion, if there is no other point that can maximize the objective then stop while condition: rand_index = random.randint(0, len(indices) - 1) next_point = params[indices[rand_index]] condition = next_point in parms_done.tolist() if len(next_point) == 1 and condition: return True if k == n_iterations-1: plt.subplot(2, 1, 2) plt.plot(params, cumu_gaussian, label='PI') return next_point
def _get_scaler_function(self, scaler_algo): scaler = None if scaler_algo == 'percentile': scaler = lambda x: rankdata(x).astype(np.float64) / len(x) elif scaler_algo == 'normcdf': # scaler = lambda x: ECDF(x[cat_word_counts != 0])(x) scaler = lambda x: norm.cdf(x, x.mean(), x.std()) elif scaler_algo == 'none': scaler = lambda x: x else: raise InvalidScalerException("Invalid scaler alogrithm. Must be either percentile or normcdf.") return scaler
def _get_scaler_function(scaler_algo): scaler = None if scaler_algo == 'normcdf': scaler = lambda x: norm.cdf(x, x.mean(), x.std()) elif scaler_algo == 'percentile': scaler = lambda x: rankdata(x).astype(np.float64) / len(x) elif scaler_algo == 'none': scaler = lambda x: x else: raise InvalidScalerException("Invalid scaler alogrithm. Must be either percentile or normcdf.") return scaler
def EI(self, x, gp, ymax): mean, var = gp.predict(x, eval_MSE = True) if var == 0: return 0 else: Z = (mean - ymax)/sqrt(var) return (mean - ymax) * norm.cdf(Z) + sqrt(var) * norm.pdf(Z)
def PoI(self, x, gp, ymax): mean, var = gp.predict(x, eval_MSE = True) if var == 0: return 1 else: Z = (mean - ymax)/sqrt(var) return norm.cdf(Z) ################################################################################ ################################## Print Info ################################## ################################################################################
def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'): """ Calculates the statistic significance between two dependent correlation coefficients @param xy: correlation coefficient between x and y @param xz: correlation coefficient between x and z @param yz: correlation coefficient between y and z @param n: number of elements in x, y and z @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method @param conf_level: confidence level, only works for 'zou' method @param method: defines the method uses, 'steiger' or 'zou' @return: t and p-val """ if method == 'steiger': d = xy - xz determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz av = (xy + xz)/2 cube = (1 - yz) * (1 - yz) * (1 - yz) t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube))) p = 1 - t.cdf(abs(t2), n - 3) if twotailed: p *= 2 return t2, p elif method == 'zou': L1 = rz_ci(xy, n, conf_level=conf_level)[0] U1 = rz_ci(xy, n, conf_level=conf_level)[1] L2 = rz_ci(xz, n, conf_level=conf_level)[0] U2 = rz_ci(xz, n, conf_level=conf_level)[1] rho_r12_r13 = rho_rxy_rxz(xy, xz, yz) lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5) upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5) return lower, upper else: raise Exception('Wrong method!')
def _ei(x, gp, y_max, xi): """ Expected Improvement, (Mockus, 1978) """ mean, var = gp.predict(x, eval_MSE=True) var = np.maximum(var, 1e-9 + 0 * var) # ???????? z = (mean - y_max - xi) / np.sqrt(var) return (mean - y_max - xi) * norm.cdf(z) + np.sqrt(var) * norm.pdf(z)
def _poi(x, gp, y_max, xi): """ Probability of Improvement, (Kushner, 1964) """ mean, var = gp.predict(x, eval_MSE=True) var = np.maximum(var, 1e-9 + 0 * var) # ???????? z = (mean - y_max - xi) / np.sqrt(var) return norm.cdf(z)