我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用scipy.optimize.check_grad()。
def test_huber_gradient(): # Test that the gradient calculated by _huber_loss_and_gradient is correct rng = np.random.RandomState(1) X, y = make_regression_with_outliers() sample_weight = rng.randint(1, 3, (y.shape[0])) loss_func = lambda x, *args: _huber_loss_and_gradient(x, *args)[0] grad_func = lambda x, *args: _huber_loss_and_gradient(x, *args)[1] # Check using optimize.check_grad that the gradients are equal. for _ in range(5): # Check for both fit_intercept and otherwise. for n_features in [X.shape[1] + 1, X.shape[1] + 2]: w = rng.randn(n_features) w[-1] = np.abs(w[-1]) grad_same = optimize.check_grad( loss_func, grad_func, w, X, y, 0.01, 0.1, sample_weight) assert_almost_equal(grad_same, 1e-6, 4)
def test_gradient(): # Test gradient of Kullback-Leibler divergence. random_state = check_random_state(0) n_samples = 50 n_features = 2 n_components = 2 alpha = 1.0 distances = random_state.randn(n_samples, n_features).astype(np.float32) distances = distances.dot(distances.T) np.fill_diagonal(distances, 0.0) X_embedded = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0) fun = lambda params: _kl_divergence(params, P, alpha, n_samples, n_components)[0] grad = lambda params: _kl_divergence(params, P, alpha, n_samples, n_components)[1] assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
def _gamma_update_core(self): gamma = self.gamma err = check_grad( gamma_fullsum_func, gamma_fullsum_grad, gamma, self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ) print 'gradient error ', err optout = minimize( gamma_fullsum_grad, gamma, ( self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ), method='BFGS', jac=True, options={'disp': True}, ) return float(optout.x)
def test_grad(): rng = np.random.RandomState(0) X = rng.randn(10, 2) Z = rng.randn(8, 2) print(check_grad(_func, _grad, Z.ravel(), X))
def test_ridge_grad_id(): """Test ovk.OVKRidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(2)) risk = ovk.OVKRidgeRisk(0.01) assert check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], randn(X.shape[0] * y.shape[1]), y.ravel(), K(X, X)) < 1e-3
def test_ridge_grad_cov(): """Test ovk.OVKRidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(2)) risk = ovk.OVKRidgeRisk(0.01) assert check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], randn(X.shape[0] * y.shape[1]), y.ravel(), K(X, X)) < 1e-3
def test_rff_ridge_grad_id(): """Test ovk.ORFFidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(2)) risk = ovk.ORFFRidgeRisk(0.01) D = 100 assert check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], randn(D * y.shape[1]), y.ravel(), K.get_orff_map(X, D), K) < 1e-3
def test_rff_ridge_grad_cov(): """Test ovk.ORFFRidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(2)) risk = ovk.ORFFRidgeRisk(0.01) D = 100 assert check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], randn(D * y.shape[1]), y.ravel(), K.get_orff_map(X, D), K) < 1e-3
def test_rff_ridge_hinge_grad(): """Test ovk.ORFFRidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(3)) risk = ovk.ORFFRidgeRisk(0.01, 'Hinge') D = 100 y = one_hot(randint(0, 3, X.shape[0]), 3) vl = check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], rand(D * y.shape[1]), y.ravel(), K.get_orff_map(X, D), K) assert vl < 1e-3
def optimize(self, X, C, y, kernelNames, regions, reml=True, maxiter=100): methodName = ('REML' if reml else 'ML') if self.verbose: print 'Finding MKLMM', methodName, 'parameters for', len(regions), 'regions with lengths:', [np.sum(r) for r in regions] #prepare initial values for sig2e and for fixed effects hyp0_sig2e, hyp0_fixedEffects = self.getInitialHyps(X, C, y) #build kernel and train a model t0 = time.time() kernel, hyp0_kernels = self.buildKernel(X, kernelNames, regions, y.var()) hyp0 = np.concatenate((hyp0_sig2e, hyp0_fixedEffects, hyp0_kernels)) args = (kernel, C, y, reml) funcToSolve = self.infExact_scipy # # #check gradient correctness # # if (len(hyp0) < 10): # # self.optimization_counter=0 # # likFunc = lambda hyp: funcToSolve(hyp, kernel, C, y, reml)[0] # # gradFunc = lambda hyp: funcToSolve(hyp, kernel, C, y, reml)[1] # # err = optimize.check_grad(likFunc, gradFunc, hyp0) # # print 'gradient error:', err if self.verbose: print 'Beginning Optimization' self.optimization_counter=0 optObj = gpUtils.minimize(hyp0, funcToSolve, -maxiter, *args) if (not optObj.success): print 'Optimization status:', optObj.status print 'optimization message:', optObj.message raise Exception('Optimization failed with message: ' + optObj.message) sig2e = np.exp(2*optObj.x[0]) fixedEffects = optObj.x[1:C.shape[1]+1] hyp_kernels = optObj.x[C.shape[1]+1:] kernelObj = kernel if self.verbose: print 'done in %0.2f'%(time.time()-t0), 'seconds' print 'sig2e:', sig2e print 'Fixed effects:', fixedEffects if (hyp_kernels.shape[0] < 18): print 'kernel params:', hyp_kernels return kernelObj, hyp_kernels, sig2e, fixedEffects #convention: hyp[0] refers to sig2e, hyp[1:1+C.shape[1]+1] refer to fixed effects, hyp[self.trainCovars.shape[1]+1:]] refers to kernels
def _mu_update_core(self): print '[HPSeqFullSumGradConstr] _mu_update_core' print 'self.spontaneous_node_vec:', self.spontaneous_node_vec logmu = np.log(self.mu) err = check_grad( updates.mu_fullsum_nonapprox.logmu_fullsum_func, updates.mu_fullsum_nonapprox.logmu_fullsum_grad, logmu, self.node_vec, self.eventmemes, self.etimes, self.T, self.gamma, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ) print 'gradient error ', err optout = minimize( updates.mu_fullsum_nonapprox.logmu_fullsum_funcgrad, logmu, ( self.node_vec, self.eventmemes, self.etimes, self.T, self.gamma, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ), method='L-BFGS-B', jac=True, options=self.optim_options, ) new_mu = np.exp(np.array(optout.x)) return np.array(new_mu)
def _gamma_update_core(self): print '[HPSeqFullSumGradConstr] _gamma_update_core' print 'self.spontaneous_node_vec:', self.spontaneous_node_vec loggamma = np.log(self.gamma) err = check_grad( updates.gamma_fullsum_nonapprox.loggamma_fullsum_func, updates.gamma_fullsum_nonapprox.loggamma_fullsum_grad, loggamma, self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ) # epsilon=0.000000000000001) print 'gradient error ', err optout = minimize( updates.gamma_fullsum_nonapprox.loggamma_fullsum_funcgrad, loggamma, ( self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ), method='L-BFGS-B', jac=True, options=self.optim_options, ) new_gamma = np.exp(np.array(optout.x)) return np.array(new_gamma)
def _alpha_update_core(self): print '[HPSeqFullSumGradConstr] _alpha_update_core' logalpha = np.log(self.alpha).flatten() err = check_grad( updates.alpha_fullsum_nonapprox.logalpha_fullsum_func, updates.alpha_fullsum_nonapprox.logalpha_fullsum_grad, logalpha, self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.gamma, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ) print 'gradient error ', err optout = minimize( updates.alpha_fullsum_nonapprox.logalpha_fullsum_funcgrad, logalpha, ( self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.gamma, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ), method='L-BFGS-B', jac=True, options=self.optim_options, ) new_alpha = np.exp(optout.x) return np.reshape(new_alpha, (self.alpha.shape[0], self.alpha.shape[1]))
def _mu_update_core(self): print '[HPSeqFullSumGrad] _mu_update_core' mu = self.mu for (index, mui) in enumerate(mu): print 'mui:', mui err = check_grad( mu_fullsum_func, mu_fullsum_grad, [mui], index, mu, self.node_vec, self.eventmemes, self.etimes, self.T, self.gamma, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ) print 'gradient error ', err new_mu = [] for (index, mui) in enumerate(mu): optout = minimize( mu_fullsum_funcgrad, mui, ( index, mu, self.node_vec, self.eventmemes, self.etimes, self.T, self.gamma, self.alpha, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ), method='BFGS', jac=True, options={'disp': True}, ) new_mu.append(float(optout.x)) mu[index] = float(optout.x) # should we update the mu already? return np.array(new_mu)
def _alpha_update_core(self): print '[HPSeqFullSumGradConstr] _alpha_update_core' logalpha = np.log(self.alpha) for (index1, _) in enumerate(logalpha): for (index2, _) in enumerate(logalpha[index1]): logalphaij = logalpha[index1][index2] print 'logmalphaij:', logalphaij err = check_grad( updates.alpha_fullsum_nonapprox_iter.logalpha_fullsum_func, updates.alpha_fullsum_nonapprox_iter.logalpha_fullsum_grad, [logalphaij], (index1, index2), logalpha, self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.gamma, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ) print 'gradient error ', err new_logalpha = [[0 for (index2, _) in enumerate(logalpha[index1])] for (index1, _) in enumerate(logalpha)] for (index1, _) in enumerate(logalpha): for (index2, _) in enumerate(logalpha[index1]): optout = minimize( updates.alpha_fullsum_nonapprox_iter.logalpha_fullsum_funcgrad, logalpha[index1][index2], ( (index1, index2), logalpha, self.node_vec, self.eventmemes, self.etimes, self.T, self.mu, self.gamma, self.omega, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, ), method='L-BFGS-B', jac=True, options={'disp': True}, ) new_logalpha[index1][index2] = float(optout.x) new_alpha = np.exp(np.matrix(new_logalpha)) return np.array(new_alpha)
def solve_unit_norm_dual(lhs, rhs, lambd0, factr=1e7, debug=False, lhs_is_toeplitz=False): if np.all(rhs == 0): return np.zeros(lhs.shape[0]), 0. n_atoms = lambd0.shape[0] n_times_atom = lhs.shape[0] // n_atoms # precompute SVD # U, s, V = linalg.svd(lhs) if lhs_is_toeplitz: # first column of the toeplitz matrix lhs lhs_c = lhs[0, :] # lhs will not stay toeplitz if we add different lambd on the diagonal assert n_atoms == 1 def x_star(lambd): lambd += 1e-14 # avoid numerical issues # lhs_inv = np.dot(V.T / (s + np.repeat(lambd, n_times_atom)), U.T) # return np.dot(lhs_inv, rhs) lhs_c_copy = lhs_c.copy() lhs_c_copy[0] += lambd return linalg.solve_toeplitz(lhs_c_copy, rhs) else: def x_star(lambd): lambd += 1e-14 # avoid numerical issues # lhs_inv = np.dot(V.T / (s + np.repeat(lambd, n_times_atom)), U.T) # return np.dot(lhs_inv, rhs) return linalg.solve(lhs + np.diag(np.repeat(lambd, n_times_atom)), rhs) def dual(lambd): x_hats = x_star(lambd) norms = linalg.norm(x_hats.reshape(-1, n_times_atom), axis=1) return (x_hats.T.dot(lhs).dot(x_hats) - 2 * rhs.T.dot(x_hats) + np.dot( lambd, norms ** 2 - 1.)) def grad_dual(lambd): x_hats = x_star(lambd).reshape(-1, n_times_atom) return linalg.norm(x_hats, axis=1) ** 2 - 1. def func(lambd): return -dual(lambd) def grad(lambd): return -grad_dual(lambd) bounds = [(0., None) for idx in range(0, n_atoms)] if debug: assert optimize.check_grad(func, grad, lambd0) < 1e-5 lambd_hats, _, _ = optimize.fmin_l_bfgs_b(func, x0=lambd0, fprime=grad, bounds=bounds, factr=factr) x_hat = x_star(lambd_hats) return x_hat, lambd_hats