我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.var()。
def alpha(self): # Cronbach Alpha alpha = pd.DataFrame(0, index=np.arange(1), columns=self.latent) for i in range(self.lenlatent): block = self.data_[self.Variables['measurement'] [self.Variables['latent'] == self.latent[i]]] p = len(block.columns) if(p != 1): p_ = len(block) correction = np.sqrt((p_ - 1) / p_) soma = np.var(np.sum(block, axis=1)) cor_ = pd.DataFrame.corr(block) denominador = soma * correction**2 numerador = 2 * np.sum(np.tril(cor_) - np.diag(np.diag(cor_))) alpha_ = (numerador / denominador) * (p / (p - 1)) alpha[self.latent[i]] = alpha_ else: alpha[self.latent[i]] = 1 return alpha.T
def __call__(self, *inputvals): assert len(inputvals) == len(self.nondata_inputs) + len(self.data_inputs) nondata_vals = inputvals[0:len(self.nondata_inputs)] data_vals = inputvals[len(self.nondata_inputs):] feed_dict = dict(zip(self.nondata_inputs, nondata_vals)) n = data_vals[0].shape[0] for v in data_vals[1:]: assert v.shape[0] == n for i_start in range(0, n, self.batch_size): slice_vals = [v[i_start:min(i_start+self.batch_size, n)] for v in data_vals] for (var,val) in zip(self.data_inputs, slice_vals): feed_dict[var]=val results = tf.get_default_session().run(self.outputs, feed_dict=feed_dict) if i_start==0: sum_results = results else: for i in range(len(results)): sum_results[i] = sum_results[i] + results[i] for i in range(len(results)): sum_results[i] = sum_results[i] / n return sum_results # ================================================================ # Modules # ================================================================
def effective_sample_size(x, mu, var, logger): """ Calculate the effective sample size of sequence generated by MCMC. :param x: :param mu: mean of the variable :param var: variance of the variable :param logger: logg :return: effective sample size of the sequence Make sure that `mu` and `var` are correct! """ # batch size, time, dimension b, t, d = x.shape ess_ = np.ones([d]) for s in range(1, t): p = auto_correlation_time(x, s, mu, var) if np.sum(p > 0.05) == 0: break else: for j in range(0, d): if p[j] > 0.05: ess_[j] += 2.0 * p[j] * (1.0 - float(s) / t) logger.info('ESS: max [%f] min [%f] / [%d]' % (t / np.min(ess_), t / np.max(ess_), t)) return t / ess_
def bn_hat_z_layers(self, hat_z_layers, z_pre_layers): # TODO: Calculate batchnorm using GPU Tensors. assert len(hat_z_layers) == len(z_pre_layers) hat_z_layers_normalized = [] for i, (hat_z, z_pre) in enumerate(zip(hat_z_layers, z_pre_layers)): if self.use_cuda: ones = Variable(torch.ones(z_pre.size()[0], 1).cuda()) else: ones = Variable(torch.ones(z_pre.size()[0], 1)) mean = torch.mean(z_pre, 0) noise_var = np.random.normal(loc=0.0, scale=1 - 1e-10, size=z_pre.size()) if self.use_cuda: var = np.var(z_pre.data.cpu().numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1]) else: var = np.var(z_pre.data.numpy() + noise_var, axis=0).reshape(1, z_pre.size()[1]) var = Variable(torch.FloatTensor(var)) if self.use_cuda: hat_z = hat_z.cpu() ones = ones.cpu() mean = mean.cpu() hat_z_normalized = torch.div(hat_z - ones.mm(mean), ones.mm(torch.sqrt(var + 1e-10))) if self.use_cuda: hat_z_normalized = hat_z_normalized.cuda() hat_z_layers_normalized.append(hat_z_normalized) return hat_z_layers_normalized
def test_ddof_too_big(self): nanfuncs = [np.nanvar, np.nanstd] stdfuncs = [np.var, np.std] dsize = [len(d) for d in _rdat] for nf, rf in zip(nanfuncs, stdfuncs): for ddof in range(5): with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') tgt = [ddof >= d for d in dsize] res = nf(_ndat, axis=1, ddof=ddof) assert_equal(np.isnan(res), tgt) if any(tgt): assert_(len(w) == 1) assert_(issubclass(w[0].category, RuntimeWarning)) else: assert_(len(w) == 0)
def update(self, x): batch_mean = np.mean(x, axis=0) batch_var = np.var(x, axis=0) batch_count = x.shape[0] delta = batch_mean - self.mean tot_count = self.count + batch_count new_mean = self.mean + delta * batch_count / tot_count m_a = self.var * (self.count) m_b = batch_var * (batch_count) M2 = m_a + m_b + np.square(delta) * self.count * batch_count / (self.count + batch_count) new_var = M2 / (self.count + batch_count) new_count = batch_count + self.count self.mean = new_mean self.var = new_var self.count = new_count
def test_runningmeanstd(): for (x1, x2, x3) in [ (np.random.randn(3), np.random.randn(4), np.random.randn(5)), (np.random.randn(3,2), np.random.randn(4,2), np.random.randn(5,2)), ]: rms = RunningMeanStd(epsilon=0.0, shape=x1.shape[1:]) x = np.concatenate([x1, x2, x3], axis=0) ms1 = [x.mean(axis=0), x.var(axis=0)] rms.update(x1) rms.update(x2) rms.update(x3) ms2 = [rms.mean, rms.var] assert np.allclose(ms1, ms2)
def autocorrelate(signal, lag=1): """Gives the correlation coefficient for the signal's correlation with itself. Args: signal: The signal on which to compute the autocorrelation. Can be a list. lag: The offset at which to correlate the signal with itself. E.g. if lag is 1, will compute the correlation between the signal and itself 1 beat later. Returns: Correlation coefficient. """ n = len(signal) x = np.asarray(signal) - np.mean(signal) c0 = np.var(signal) return (x[lag:] * x[:n - lag]).sum() / float(n) / c0
def effective_sample_size_1d(samples): """ Compute the effective sample size of a chain of scalar samples. :param samples: A 1-D numpy array. The chain of samples. :return: A float. The effective sample size. """ n = samples.shape[0] mu_hat = np.mean(samples) var = np.var(samples) * n / (n - 1) var_plus = var * (n - 1) / n def auto_covariance(lag): return np.mean((samples[:n - lag] - mu_hat) * (samples[lag:] - mu_hat)) sum_rho = 0 for t in range(0, n): rho = 1 - (var - auto_covariance(t)) / var_plus if rho < 0: break sum_rho += rho ess = n / (1 + 2 * sum_rho) return ess
def __init__(self, kernel='rbf', nbases=50, lenscale=1., var=1., regulariser=1., ard=True, maxiter=3000, batch_size=10, alpha=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8, random_state=None, nstarts=500): super().__init__(likelihood=Gaussian(Parameter(var, Positive())), basis=None, maxiter=maxiter, batch_size=batch_size, updater=Adam(alpha, beta1, beta2, epsilon), random_state=random_state, nstarts=nstarts ) self._store_params(kernel, regulariser, nbases, lenscale, ard) # Bespoke regressor for basin-depth problems
def __init__(self, kernel='rbf', nbases=50, lenscale=1., var=1., falloff=1., regulariser=1., ard=True, indicator_field='censored', maxiter=3000, batch_size=10, alpha=0.01, beta1=0.9, beta2=0.99, epsilon=1e-8, random_state=None): lhood = Switching(lenscale=falloff, var_init=Parameter(var, Positive())) super().__init__(likelihood=lhood, basis=None, maxiter=maxiter, batch_size=batch_size, updater=Adam(alpha, beta1, beta2, epsilon), random_state=random_state ) self.indicator_field = indicator_field self._store_params(kernel, regulariser, nbases, lenscale, ard)
def test_final_variance_runs(self): exp = VarianceExperiment() printer_final = Print(name="Final") avgr = Averager('repeats', name="TestAverager") var_buff = DataBuffer(name='Variance Buffer') mean_buff = DataBuffer(name='Mean Buffer') edges = [(exp.chan1, avgr.sink), (avgr.final_variance, printer_final.sink), (avgr.final_variance, var_buff.sink), (avgr.final_average, mean_buff.sink)] exp.set_graph(edges) exp.run_sweeps() var_data = var_buff.get_data()['Variance'].reshape(var_buff.descriptor.data_dims()) mean_data = mean_buff.get_data()['chan1'].reshape(mean_buff.descriptor.data_dims()) orig_data = exp.vals.reshape(exp.chan1.descriptor.data_dims()) self.assertTrue(np.abs(np.sum(mean_data - np.mean(orig_data, axis=0))) <= 1e-3) self.assertTrue(np.abs(np.sum(var_data - np.var(orig_data, axis=0, ddof=1))) <= 1e-3)
def regularize(features): #regularize per column for i in range(0,len(features[0])): try: #take evary column feat=features[:,i] #mean and variance of every column mean=np.mean(feat) var=np.var(feat) if(var!=0): features[:,i]=(features[:,i]-mean)/float(3*var) else : features[:,i]=0 except: pass features[features>1]=1 features[features<-1]=-1 return features #reguralize features to [-1,1] horizontally, yi=yi/norm(yi,2)
def test_variance_wgrad(input_tensor): inputs = input_tensor targets = ng.placeholder(inputs.axes) inp_stat = ng.variance(inputs, reduction_axes=inputs.axes.batch_axes()) err = ng.sum(inp_stat - targets, out_axes=()) d_inputs = ng.deriv(err, inputs) with executor([err, d_inputs], inputs, targets) as comp_func: input_value = rng.uniform(-0.1, 0.1, inputs.axes) target_value = rng.uniform(-0.1, 0.1, targets.axes) ng_f_res, ng_b_res = comp_func(input_value, target_value) np_f_res = np.sum(np.var(input_value, axis=1, keepdims=True) - target_value) ng.testing.assert_allclose(np_f_res, ng_f_res, atol=1e-4, rtol=1e-4) np_b_res = 2 * (input_value - np.mean(input_value, axis=1, keepdims=True)) ng.testing.assert_allclose(np_b_res, ng_b_res, atol=1e-4, rtol=1e-4)
def calculateParamGV(feat_file_list, feat_dim=32): data = numpy.empty((1, feat_dim)) for file_index in range(len(feat_file_list)): file_name = feat_file_list[file_index] (junk, ext) = feat_file_list[file_index].split('.') features = readBottleneckFeatures(file_name, feat_dim) if ext == 'lf0': #remove unvoiced values features = features[numpy.where(features != -1.*(10**(10)))[0]] features = numpy.exp(features) #convert to linear scale if file_index==0: data=features else: data=numpy.concatenate((data,features),0) gv = numpy.var(data, 0) return gv
def bn_forward(X, gamma, beta, cache, momentum=.9, train=True): running_mean, running_var = cache if train: mu = np.mean(X, axis=0) var = np.var(X, axis=0) X_norm = (X - mu) / np.sqrt(var + c.eps) out = gamma * X_norm + beta cache = (X, X_norm, mu, var, gamma, beta) running_mean = util.exp_running_avg(running_mean, mu, momentum) running_var = util.exp_running_avg(running_var, var, momentum) else: X_norm = (X - running_mean) / np.sqrt(running_var + c.eps) out = gamma * X_norm + beta cache = None return out, cache, running_mean, running_var
def bn_backward(dout, cache): X, X_norm, mu, var, gamma, beta = cache N, D = X.shape X_mu = X - mu std_inv = 1. / np.sqrt(var + c.eps) dX_norm = dout * gamma dvar = np.sum(dX_norm * X_mu, axis=0) * -.5 * std_inv**3 dmu = np.sum(dX_norm * -std_inv, axis=0) + dvar * np.mean(-2. * X_mu, axis=0) dX = (dX_norm * std_inv) + (dvar * 2 * X_mu / N) + (dmu / N) dgamma = np.sum(dout * X_norm, axis=0) dbeta = np.sum(dout, axis=0) return dX, dgamma, dbeta
def cv_gradient(self,z): """ The control variate augmented Monte Carlo gradient estimate """ gradient = np.zeros(np.sum(self.approx_param_no)) z_t = z.T log_q = self.normal_log_q(z.T) log_p = self.log_p(z.T) grad_log_q = self.grad_log_q(z) gradient = grad_log_q*(log_p-log_q) alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no)) vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T return np.mean(vectorized,axis=1)
def cv_gradient_initial(self,z): """ The control variate augmented Monte Carlo gradient estimate """ gradient = np.zeros(np.sum(self.approx_param_no)) z_t = z.T log_q = self.normal_log_q_initial(z.T) log_p = self.log_p(z.T) grad_log_q = self.grad_log_q(z) gradient = grad_log_q*(log_p-log_q) alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no)) vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T return np.mean(vectorized,axis=1)
def cv_gradient(self, z): """ The control variate augmented Monte Carlo gradient estimate RAO-BLACKWELLIZED! """ z_t = np.transpose(z) log_q = self.normal_log_q(z_t) log_p = self.log_p(z_t) grad_log_q = self.grad_log_q(z) gradient = grad_log_q*np.repeat((log_p - log_q).T,2,axis=0) alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no)) vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T return np.mean(vectorized,axis=1)
def cv_gradient_initial(self,z): """ The control variate augmented Monte Carlo gradient estimate RAO-BLACKWELLIZED! """ z_t = np.transpose(z) log_q = self.normal_log_q_initial(z_t) log_p = self.log_p(z_t) grad_log_q = self.grad_log_q(z) gradient = grad_log_q*np.repeat((log_p - log_q).T,2,axis=0) alpha0 = alpha_recursion(np.zeros(np.sum(self.approx_param_no)), grad_log_q, gradient, np.sum(self.approx_param_no)) vectorized = gradient - ((alpha0/np.var(grad_log_q,axis=1))*grad_log_q.T).T return np.mean(vectorized,axis=1)
def testRandom(self): ig = InverseGaussian(1., 1.) samples = ig.random(1000000) mu = numpy.mean(samples) var = numpy.var(samples) self.assertAlmostEqual(ig.mu, mu, delta=1e-1) self.assertAlmostEqual(ig.mu ** 3 / ig.shape, var, delta=1e-1) ig = InverseGaussian(3., 6.) samples = ig.random(1000000) mu = numpy.mean(samples) var = numpy.var(samples) self.assertAlmostEqual(ig.mu, mu, delta=1e-1) self.assertAlmostEqual(ig.mu ** 3 / ig.shape, var, delta=5e-1)
def testRandom(self): from scipy.special import kv from numpy import sqrt a = 2. b = 1. p = 1 gig = GeneralizedInverseGaussian(a, b, p) samples = gig.random(10000) mu_analytical = sqrt(b) * kv(p + 1, sqrt(a * b)) / (sqrt(a) * kv(p, sqrt(a * b))) var_analytical = b * kv(p + 2, sqrt(a * b)) / a / kv(p, sqrt(a * b)) - mu_analytical ** 2 mu = numpy.mean(samples) var = numpy.var(samples) self.assertAlmostEqual(mu_analytical, mu, delta=1e-1) self.assertAlmostEqual(var_analytical, var, delta=1e-1)
def calculate_aggregate(values): agg_measures = { 'avg': np.mean(values), 'std': np.std(values), 'var': np.var(values), 'med': np.median(values), '10p': np.percentile(values, 10), '25p': np.percentile(values, 25), '50p': np.percentile(values, 50), '75p': np.percentile(values, 75), '90p': np.percentile(values, 90), 'iqr': np.percentile(values, 75) - np.percentile(values, 25), 'iqm': interquartile_range_mean(values), 'mad': mean_absolute_deviation(values), 'cov': 1.0 * np.mean(values) / np.std(values), 'gin': gini_coefficient(values), 'skw': stats.skew(values), 'kur': stats.kurtosis(values), 'sum': np.sum(values) } return agg_measures
def test_sample_contexts_from_distribution(): env = Catapult(segments=[(0, 0), (20, 0)], context_interval=(0, 20), context_distribution=uniform(5, 10), random_state=0) env.init() contexts = np.empty(1000) for i in range(contexts.shape[0]): context = env.request_context(None) contexts[i] = context[0] norm_dist = uniform(0.25, 0.5) assert_true(np.all(0.25 <= contexts)) assert_true(np.all(contexts <= 0.75)) mean, var = norm_dist.stats("mv") assert_almost_equal(np.mean(contexts), mean, places=1) assert_almost_equal(np.var(contexts), var, places=1)
def displayDataset(self, dataset): eps = 0.00001 linewidth = dataset.linewidth if np.var(dataset.values) < eps: linewidth += 2 mean = np.mean(dataset.values) x = np.arange(0, 1, 0.1) x = np.sort(np.append(x, [mean, mean-eps, mean+eps])) density = [1 if v == mean else 0 for v in x] else: self.kde.fit(np.asarray([[x] for x in dataset.values])) ## Computes the x axis x_max = np.amax(dataset.values) x_min = np.amin(dataset.values) delta = x_max - x_min density_delta = 1.1 * delta x = np.arange(x_min, x_max, density_delta / self.num_points) x_density = [[y] for y in x] ## kde.score_samples returns the 'log' of the density log_density = self.kde.score_samples(x_density).tolist() density = map(math.exp, log_density) self.ax.plot(x, density, label = dataset.label, color = dataset.color, linewidth = linewidth, linestyle = dataset.linestyle)
def snr(vref, vcmp): """ Compute Signal to Noise Ratio (SNR) of two images. Parameters ---------- vref : array_like Reference image vcmp : array_like Comparison image Returns ------- x : float SNR of `vcmp` with respect to `vref` """ dv = np.var(vref) with np.errstate(divide='ignore'): rt = dv/mse(vref, vcmp) return 10.0*np.log10(rt)
def bsnr(vblr, vnsy): """ Compute Blurred Signal to Noise Ratio (BSNR) for a blurred and noisy image. Parameters ---------- vblr : array_like Blurred noise free image vnsy : array_like Blurred image with additive noise Returns ------- x : float BSNR of `vnsy` with respect to `vblr` and `vdeg` """ blrvar = np.var(vblr) nsevar = np.var(vnsy - vblr) with np.errstate(divide='ignore'): rt = blrvar/nsevar return 10.0*np.log10(rt)
def forward_pass(self, X, training=True): # Initialize running mean and variance if first run if self.running_mean is None: self.running_mean = np.mean(X, axis=0) self.running_var = np.var(X, axis=0) if training: mean = np.mean(X, axis=0) var = np.var(X, axis=0) self.X_centered = X - mean self.stddev_inv = 1 / np.sqrt(var + self.eps) self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var else: mean = self.running_mean var = self.running_var X_norm = (X - mean) / np.sqrt(var + self.eps) output = self.gamma * X_norm + self.beta return output
def Gelman_Rubin(sampled_parameters): nsamples = len(sampled_parameters[0]) nchains = len(sampled_parameters) nburnin = nsamples//2 chain_var = [np.var(sampled_parameters[chain][nburnin:,:], axis=0) for chain in range(nchains)] W = np.mean(chain_var, axis=0) chain_means = [np.mean(sampled_parameters[chain][nburnin:,:], axis=0) for chain in range(nchains)] B = np.var(chain_means, axis=0) var_est = (W*(1-(1./nsamples))) + B Rhat = np.sqrt(np.divide(var_est, W)) return Rhat
def testRelativeToJackknife(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6, 7, 8, 9], "Y": [0, 0, 0, 1, 1, 1, 2, 2, 2]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 0) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([1, 2], name="Y") correct = pd.DataFrame( np.array([[9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))], [18.0, np.sqrt(5 * np.var([21, 20, 19, 11, 10, 9]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
def testRelativeToJackknifeIncludeBaseline(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6, 7, 8, 9], "Y": [0, 0, 0, 1, 1, 1, 2, 2, 2]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 0, include_base=True) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([0, 1, 2], name="Y") correct = pd.DataFrame( np.array([[0.0, 0.0], [9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))], [18.0, np.sqrt(5 * np.var([21, 20, 19, 11, 10, 9]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
def testRelativeToJackknifeSingleComparisonBaselineFirst(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6], "Y": [0, 0, 0, 1, 1, 1]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 0) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([1], name="Y") correct = pd.DataFrame( np.array([[9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
def testRelativeToJackknifeSingleComparisonBaselineSecond(self): data = pd.DataFrame({"X": [1, 2, 3, 4, 5, 6], "Y": [0, 0, 0, 1, 1, 1]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Y", 1) se_method = standard_errors.Jackknife() output = core.Analyze(data).relative_to(comparison).with_standard_errors( se_method).calculate(metric).run() rowindex = pd.Index([0], name="Y") correct = pd.DataFrame( np.array([[-9.0, np.sqrt(5 * np.var([12, 11, 10, 5, 4, 3]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
def testRelativeToSplitJackknife(self): data = pd.DataFrame( {"X": [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8], "Y": [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3], "Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]}) metric = metrics.Sum("X") comparison = comparisons.AbsoluteDifference("Z", 0) se_method = standard_errors.Jackknife() output = core.Analyze(data).split_by("Y").relative_to( comparison).with_standard_errors(se_method).calculate(metric).run() rowindex = pd.MultiIndex( levels=[[1, 2, 3], [1]], labels=[[0, 1, 2], [0, 0, 0]], names=["Y", "Z"]) correct = pd.DataFrame( np.array([[-3.0, np.sqrt(5 * np.var([0, -1, -2, -3, -4, -5]))], [-3.0, np.sqrt(5 * np.var([3, 2, 1, -8, -7, -6]))], [-3.0, np.sqrt(5 * np.var([6, 5, 4, -11, -10, -9]))]]), columns=("sum(X) Absolute Difference", "sum(X) Absolute Difference Jackknife SE"), index=rowindex) self.assertTrue(output.equals(correct))
def mainHmmGeneralClf(): isTrain = 1 # 1 for train, 0 for test isOutlierRemoval = 1 # 1 for outlier removal, 0 otherwise performance = 0 normalizedPerformance = 0 clf = ClassificationHmmGeneralize(isTrain) normPerforms = [] for i in range(12): print "Route: {}".format(i) [perfor, normaPefor] = clf.evaluateGeneral(clf.routes_general[i]) normPerforms.append(normaPefor) performance += perfor normalizedPerformance += normaPefor performance = round(performance/8, 2) normalizedPerformance = round(normalizedPerformance/8, 2) print "\nAverage Performance: {}%".format(performance) print "Average Normalized Performance: {}%".format(normalizedPerformance) print "Normalized Performance Variance: {}".format(np.var(normPerforms))
def mainUniformGeneralClf(): isTrain = 1 # 1 for train, 0 for test isOutlierRemoval = 1 # 1 for outlier removal, 0 otherwise performance = 0 normalizedPerformance = 0 clf = ClassificationUniformGeneralize(isTrain) print clf.X_general.shape normPerforms = [] for i in range(12): print "Route: {}".format(i) [perfor, normaPefor] = clf.evaluateGeneral(clf.routes_general[i]) normPerforms.append(normaPefor) performance += perfor normalizedPerformance += normaPefor performance = round(performance/8, 2) normalizedPerformance = round(normalizedPerformance/8, 2) print "\nAverage Performance: {}%".format(performance) print "Average Normalized Performance: {}%".format(normalizedPerformance) print "Normalized Performance Variance: {}".format(np.var(normPerforms))
def vif(self): vif = [] totalmanifests = range(len(self.data_.columns)) for i in range(len(totalmanifests)): independent = [x for j, x in enumerate(totalmanifests) if j != i] coef, resid = np.linalg.lstsq( self.data_.ix[:, independent], self.data_.ix[:, i])[:2] r2 = 1 - resid / \ (self.data_.ix[:, i].size * self.data_.ix[:, i].var()) vif.append(1 / (1 - r2)) vif = pd.DataFrame(vif, index=self.manifests) return vif
def explained_variance_1d(ypred,y): """ Var[ypred - y] / var[y]. https://www.quora.com/What-is-the-meaning-proportion-of-variance-explained-in-linear-regression """ assert y.ndim == 1 and ypred.ndim == 1 vary = np.var(y) return np.nan if vary==0 else 1 - np.var(y-ypred)/vary
def gauss_prob(mu, logstd, x): std = tf.exp(logstd) var = tf.square(std) gp = tf.exp(-(x - mu)/(2*var)) / ((2*np.pi)**.5 * std) return tf.reduce_prod(gp, [1])
def gauss_log_prob(mu, logstd, x): var = tf.exp(2*logstd) gp = -tf.square(x - mu)/(2*var) - .5*tf.log(tf.constant(2*np.pi)) - logstd return tf.reduce_sum(gp, [1])
def var(x, axis=None, keepdims=False): meanx = mean(x, axis=axis, keepdims=keepdims) return mean(tf.square(x - meanx), axis=axis, keepdims=keepdims)
def std(x, axis=None, keepdims=False): return tf.sqrt(var(x, axis=axis, keepdims=keepdims))
def minimize_and_clip(optimizer, objective, var_list, clip_val=10): """Minimized `objective` using `optimizer` w.r.t. variables in `var_list` while ensure the norm of the gradients for each variable is clipped to `clip_val` """ gradients = optimizer.compute_gradients(objective, var_list=var_list) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, clip_val), var) return optimizer.apply_gradients(gradients)
def test_output_shape(self): """ Test that the axis parameter is handled correctly """ stream = [np.random.random((16, 7, 3)) for _ in range(5)] stack = np.stack(stream, axis = -1) for axis in (0, 1, 2, None): with self.subTest('axis = {}'.format(axis)): from_numpy = np.var(stack, axis = axis) from_ivar = last(ivar(stream, axis = axis)) self.assertSequenceEqual(from_numpy.shape, from_ivar.shape) self.assertTrue(np.allclose(from_ivar, from_numpy))
def test_ddof(self): """ Test that the ddof parameter is equivalent to numpy's """ stream = [np.random.random((16, 7, 3)) for _ in range(10)] stack = np.stack(stream, axis = -1) with catch_warnings(): simplefilter('ignore') for axis in (0, 1, 2, None): for ddof in range(4): with self.subTest('axis = {}, ddof = {}'.format(axis, ddof)): from_numpy = np.var(stack, axis = axis, ddof = ddof) from_ivar = last(ivar(stream, axis = axis, ddof = ddof)) self.assertSequenceEqual(from_numpy.shape, from_ivar.shape) self.assertTrue(np.allclose(from_ivar, from_numpy))
def summarize_bootstrapped_top_n(top_n_boot): top_n_bcs_mean = np.mean(top_n_boot) top_n_bcs_sd = np.std(top_n_boot) top_n_bcs_var = np.var(top_n_boot) result = {} result['filtered_bcs_var'] = top_n_bcs_var result['filtered_bcs_cv'] = tk_stats.robust_divide(top_n_bcs_sd, top_n_bcs_mean) result['filtered_bcs_lb'] = round(scipy.stats.norm.ppf(0.025, top_n_bcs_mean, top_n_bcs_sd)) result['filtered_bcs_ub'] = round(scipy.stats.norm.ppf(0.975, top_n_bcs_mean, top_n_bcs_sd)) result['filtered_bcs'] = round(top_n_bcs_mean) return result
def auto_correlation_time(x, s, mu, var): b, t, d = x.shape act_ = np.zeros([d]) for i in range(0, b): y = x[i] - mu p, n = y[:-s], y[s:] act_ += np.mean(p * n, axis=0) / var act_ = act_ / b return act_
def gelman_rubin_diagnostic(x, logger, mu=None): m, n = x.shape[0], x.shape[1] theta = np.mean(x, axis=1) sigma = np.var(x, axis=1) # theta_m = np.mean(theta, axis=0) theta_m = mu if mu else np.mean(theta, axis=0) b = float(n) / float(m-1) * np.sum((theta - theta_m) ** 2) w = 1. / float(m) * np.sum(sigma, axis=0) v = float(n-1) / float(n) * w + float(m+1) / float(m * n) * b r_hat = np.sqrt(v / w) logger.info('R: max [%f] min [%f]' % (np.max(r_hat), np.min(r_hat))) return r_hat