我们从Python开源项目中,提取了以下36个代码示例,用于说明如何使用scipy.optimize.fmin_l_bfgs_b()。
def solve_image(eval_obj, niter, x, path=None): ''' returns an optimized image base ond l-bfgs optimizer, also returns the loss history ''' last_min_val = 1000 # start from an arbitrary number loss_history = {} shp = x.shape imagenet_mean = [123.68, 116.779, 103.939] rn_mean = np.array((imagenet_mean), dtype=np.float32) deproc = lambda x,s: np.clip(x.reshape(s)[:, :, :, ::-1] + rn_mean, 0, 255) for i in range(niter): x, min_val, info = fmin_l_bfgs_b(eval_obj.loss, x.flatten(), fprime=eval_obj.grads, maxfun=20) x = np.clip(x, -127,127) if abs(last_min_val - min_val)< 0.1: x = blurify(x) last_min_val = min_val loss_history[i] = min_val img =(deproc(x.copy(), shp)[0]) imsave(path+'/res_at_iteration_%d.png' %(i), img) return x, loss_history
def bfgs(self): def ll(x): x = x.reshape((self.K, self.L)) return self._ll(x) def dll(x): x = x.reshape((self.K, self.L)) result = self._dll(x) result = result.reshape(self.K * self.L) return result Lambda = np.random.multivariate_normal(np.zeros(self.L), (self.sigma ** 2) * np.identity(self.L), size=self.K) Lambda = Lambda.reshape(self.K * self.L) newLambda, fmin, res = optimize.fmin_l_bfgs_b(ll, Lambda, dll) self.Lambda = newLambda.reshape((self.K, self.L))
def optimize(X, y, l1, l2, tol, max_iter): nsamples, nfactors = X.shape we = np.zeros(1 + 2 * nfactors) wb = [(None, None)] + [(0, None)] * nfactors * 2 we, f, info = fmin_l_bfgs_b( func=LogisticRegressionLBFGSB.fgrad, x0=we, bounds=wb, fprime=None, pgtol=tol, maxiter=max_iter, args=(X, y, l1, l2), iprint=99 ) return we, f, info
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=0;var2=0; current_sum=0;likelihood_sum=0; new_psi1=[];new_psi2=[]; if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[[i1[0],i2[0]],[s1[0],s2[0]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[[i1[0],i2[0]],[s1[0],s2[0]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; #Debug;print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); current_sum+=float(xopt[1]); new_psi1.append(theta1);new_psi2.append(theta2); psi1=new_psi1;psi2=new_psi2; if count>1: iter_cutoff=abs(previous_sum-current_sum)/abs(previous_sum); previous_sum=current_sum; #Debug;print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); print('constrain');print(xopt);print(theta1);print(theta2); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=0;var2=0; current_sum=0;likelihood_sum=0; new_psi1=[];new_psi2=[]; #Debug;print('unconstrain_1xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i],psi2[i]],myfunc_individual_der,args=[[i1[i],i2[i]],[s1[i],s2[i]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0][0]));current_sum+=float(xopt[1]); new_psi2.append(float(xopt[0][1])); #Debug;print(xopt); likelihood_sum+=myfunc_likelihood([new_psi1[i],new_psi2[i]],[[i1[i],i2[i]],[s1[i],s2[i]],[beta_0,beta_1],var1]); psi1=new_psi1;psi2=new_psi2; #Debug;print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum)/abs(previous_sum); previous_sum=current_sum; if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); print('unconstrain');print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]); #Random Sampling Function
def MLE_marginal_iteration_constrain_paired(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length,psi1_last,psi2_last,var1_last,var2_last,rho_last,beta_0_last,beta_1_last,current_sum_last): #initial value rho=rho_last; var1=var1_last;var2=var2_last; psi1=psi1_last;psi2=psi2_last; beta_0=beta_0_last;beta_1=beta_1_last; current_sum=current_sum_last; #MLE of the marginal likelihood iter_cutoff=1;iter_maxrun=40;count=0;previous_sum=1;likelihood_sum=0; #print('psi');print(psi1);print(psi2); if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_marginal_1_paired,[beta_1],myfunc_marginal_1_der_paired,args=[i1,s1,psi1,var1,i2,s2,psi2,var2,effective_inclusion_length,effective_skipping_length,rho],bounds=[[pow(10,-2),1-pow(10,-2)-cutoff]],iprint=-1) beta_1 = max(min(float(xopt[0][0]),1-cutoff),0);beta_0=beta_1+cutoff; current_sum=float(xopt[1]); #rho=xopt[0][1]; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_marginal_2_paired,[beta_0],myfunc_marginal_2_der_paired,args=[i1,s1,psi1,var1,i2,s2,psi2,var2,effective_inclusion_length,effective_skipping_length,rho],bounds=[[pow(10,-2),1-pow(10,-2)-cutoff]],iprint=-1) beta_0 = max(min(float(xopt[0][0]),1-cutoff),0);beta_1=beta_0+cutoff; current_sum=float(xopt[1]); #rho=xopt[0][1]; print('constrain_MLE_marginal_xopt_mean');print(xopt);print(beta_0);print(beta_1); # if xopt[2]['warnflag']!=0: # return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2,rho,0]]); # else: return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2,rho,1]]); #new_psi1=[];new_psi2=[]; #for i in range(len(psi1)): # xopt = fmin_l_bfgs_b(myfunc_individual_paired,[psi1[i],psi2[i]],myfunc_individual_der_paired,args=[i1[i],s1[i],beta_0,var1,i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length,rho],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); # new_psi1.append(float(xopt[0][0]));new_psi2.append(float(xopt[0][1])); # print('unconstrain_MLE_marginal_xopt_replicate');print(xopt); #psi1=new_psi1;psi2=new_psi2; #Random Sampling Function
def optimized(self): """ Function performs whole model training. Not step by step. """ res = fmin_l_bfgs_b(self.j, np.zeros(self.x.shape[1]), self.dj) return res[0]
def get_maximum_feedback(self, context): """Returns the maximal feedback obtainable in given context.""" c = tuple(list(context)) if c not in self.max_feedback_cache: self.max_feedback_cache[c] = -np.inf for _ in range(10): x0 = [uniform.rvs(5.0, 5.0), uniform.rvs(0.0, np.pi / 2)] result = fmin_l_bfgs_b( lambda x: -self._compute_reward(x, context), x0, approx_grad=True, bounds=[(5.0, 10.0), (0.0, np.pi / 2)]) if -result[1] > self.max_feedback_cache[c]: self.max_feedback_cache[c] = -result[1] return self.max_feedback_cache[c]
def imitate(self, traj, maxfun=2500): """ Imitate a given trajectory with parameter optimization (less than 1 second). """ self.dmp.imitate_path(np.transpose(traj)) x0 = np.array(list(self.dmp.w.flatten()) + list(self.dmp.goal)) f = lambda w:np.linalg.norm(traj - self.trajectory(w)) return fmin_l_bfgs_b(f, x0, maxfun=maxfun, bounds=self.bounds, approx_grad=True)[0]
def style(self): """ Run L-BFGS over the pixels of the generated image so as to minimize the neural style loss. """ print('\nDone initializing... Ready to style!') # initialize white noise image if K.image_dim_ordering() == 'th': x = np.random.uniform(0, 255, (1, 3, self.img_nrows, self.img_ncols)) - 128. else: x = np.random.uniform(0, 255, (1, self.img_nrows, self.img_ncols, 3)) - 128. for i in range(self.iterations): print('\n\tIteration: {}'.format(i+1)) toc = time.time() x, min_val, info = fmin_l_bfgs_b(self.loss, x.flatten(), fprime=self.grads, maxfun=20) # save current generated image img = deprocess_image(x.copy(), self.img_nrows, self.img_ncols) fname = self.output_img_path + '_at_iteration_%d.png' % (i+1) imsave(fname, img) tic = time.time() print('\t\tImage saved as', fname) print('\t\tLoss: {:.2e}, Time: {} seconds'.format(float(min_val), float(tic-toc)))
def binregress(x, y, func, theta_init, bounds=None): xmin, fmin, info = fminimize(binregress_objective, theta_init, bounds=bounds, args=(x, y, func), approx_grad=True, iprint=0) return xmin #========================================================================================= # Fit functions #=========================================================================================
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=0;var2=0; current_sum=0;likelihood_sum=0; new_psi1=[];new_psi2=[]; if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[[i1[0],i2[0]],[s1[0],s2[0]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[[i1[0],i2[0]],[s1[0],s2[0]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; #Debug;print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); current_sum+=float(xopt[1]); new_psi1.append(theta1);new_psi2.append(theta2); psi1=new_psi1;psi2=new_psi2; if count>1: iter_cutoff=abs(previous_sum-current_sum)/abs(previous_sum); previous_sum=current_sum; #Debug;print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print('constrain');print(xopt);print(theta1);print(theta2); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=0;var2=0; current_sum=0;likelihood_sum=0; new_psi1=[];new_psi2=[]; #Debug;print('unconstrain_1xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i],psi2[i]],myfunc_individual_der,args=[[i1[i],i2[i]],[s1[i],s2[i]],[beta_0,beta_1],var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0][0]));current_sum+=float(xopt[1]); new_psi2.append(float(xopt[0][1])); #Debug;print(xopt); likelihood_sum+=myfunc_likelihood([new_psi1[i],new_psi2[i]],[[i1[i],i2[i]],[s1[i],s2[i]],[beta_0,beta_1],var1]); psi1=new_psi1;psi2=new_psi2; #Debug;print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum)/abs(previous_sum); previous_sum=current_sum; if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); #print('unconstrain');print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]); #Random Sampling Function
def minimize_impl(self, objective, gradient, inits, bounds): if gradient is None: approx_grad = True else: approx_grad = False self.niter = 0 def callback(xs): self.niter += 1 if self.verbose: if self.niter % 50 == 0: print('iter ', '\t'.join([x.name.split(':')[0] for x in variables])) print('{: 4d} {}'.format(self.niter, '\t'.join(map(str, xs)))) if self.callback is not None: self.callback(xs) results = fmin_l_bfgs_b( objective, inits, m=self.m, fprime=gradient, factr=self.factr, pgtol=self.pgtol, callback=callback, approx_grad=approx_grad, bounds=bounds, ) ret = OptimizationResult() ret.x = results[0] ret.func = results[1] ret.niter = results[2]['nit'] ret.calls = results[2]['funcalls'] ret.message = results[2]['task'].decode().lower() ret.success = results[2]['warnflag'] == 0 return ret
def estimate_params(data, state, alpha0=0.3, beta0=0.1, gamma0=0.1): """Estimate Holt Winters parameters from data Parameters ---------- data : ndarray observations state : HWState initial state for HW (one step prior to first data value) alpha0, beta0, gamma0 : float, float, float initial guess for HW parameters Returns ------- params : HWParams Notes ----- This is a not a demo about estimating Holt Winters parameters, and this is not a great way to go about it, because it does not produce good out-of-sample error. In this demo, we unrealistically train the HW parameters over all the data, not just the training prefix used for the initial seasonal state estimate. """ def _forecast_error(x0, state, data): """bfgs HW parameter error callback.""" E = 0 state = deepcopy(state) params = HWParams(*x0) for y in data: state, e = advance(y, params, state) E += e * e return E / len(data) alpha, beta, gamma = fmin_l_bfgs_b( _forecast_error, x0=[alpha0, beta0, gamma0], bounds=[[0, 1]] * 3, args=(state, data), approx_grad=True)[0] return HWParams(alpha, beta, gamma)
def optimize(self, x, model): evaluator = ModelEvaluator(model) data_bounds = np.repeat( # from VGG - there's probaby a nicer way to express this... [(-103.939, 255. - 103.939, -116.779, 255.0 - 116.779, -123.68, 255 - 123.68)], np.product(x.shape) // 3, axis=0 ).reshape((np.product(x.shape), 2)) x, min_val, info = fmin_l_bfgs_b( evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20, maxiter=20, factr=1e7, m=4, bounds=data_bounds, iprint=0) return x, min_val, info
def optimize_inducing_points(self, X, Y, M, kern): dim = np.shape(X)[1] hyp_init = np.ones((dim+2, 1)) for i in xrange(dim): hyp_init[i] = kern.lengthscale hyp_len = len(hyp_init) hyp_init[hyp_len - 2] = kern.variance hyp_init[hyp_len - 1] = kern.noise rand = ran.sample(range(0, X.shape[0]), M) I = X[rand] W = np.vstack((np.reshape(I, (M*dim,1), order='F'), hyp_init)) res = fmin_l_bfgs_b(self.like_spgp, W, iprint=False, args=(X,Y,M))[0] return np.reshape(res[0:M*dim], (M, dim), order='F')
def test_func_smac(self): func = rosenbrock_2d x0 = [-3, -4] bounds = [(-5, 5), (-5, 5)] x, f, _ = fmin_smac(func, x0, bounds, maxfun=10) x_s, f_s, _ = fmin_l_bfgs_b(func, x0, bounds, maxfun=10, approx_grad=True) self.assertEqual(type(x), type(x_s)) self.assertEqual(type(f), type(f_s))
def train(self, history_tuples, reg_lambda = 0.01): # history_tuples, function_obj, reg_lambda = 0.01, self.iteration = 0 self.h_tuples = history_tuples self.reg = reg_lambda self.dataset = None # this will be set by create_dataset self.tag_set = self.func.supported_tags #None # this will be also be set by create_dataset - this is the set of all tags self.create_dataset() self.dim = self.dataset.shape[1] #len(self.dataset[0]) self.num_examples = self.dataset.shape[0] if (self.model == None) or (self.model.shape[0] != self.dim): self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0 #self.model = numpy.array([0 for d in range(self.dim)]) # initialize the model to all 0 dt1 = datetime.datetime.now() #print 'before training: ', dt1 try: from scipy.optimize import minimize as mymin params = mymin(self.cost, self.model, method = 'L-BFGS-B', callback = self.cb, options = {'maxiter':25}) #, jac = self.gradient) # , options = {'maxiter':100} except: #print "Importing alternate minimizer fmin_l_bfgs_b" from scipy.optimize import fmin_l_bfgs_b as mymin params = mymin(self.cost, self.model, fprime = self.gradient) # , options = {'maxiter':100} self.model = params.x dt2 = datetime.datetime.now() #print 'after training: ', dt2, ' total time = ', (dt2 - dt1).total_seconds() if self.pic_file != None: pickle.dump({'model': self.model, 'tag_set': self.tag_set}, open(self.pic_file, "wb")) return self.cost_value
def optimize_pt(self, initializer, bounds, current_best, compute_grad=True): opt_x, opt_y, opt_info = spo.fmin_l_bfgs_b(self.acq_optimize_wrapper, initializer.flatten(), args=(current_best,compute_grad), bounds=bounds, disp=0, approx_grad=(not compute_grad)) return opt_x
def __maximize(self, index): """Helper function that leverages the BFGS algorithm with a bounded input space in order to converge the maximum of the acquisition function using gradient ascent. Notice that this function returns the point in the original input space, not the point in the unit hypercube. Parameters: index (int): An integer that keeps track of the index in the Sobol sequence at which to generate a pseudo-random configuration of inputs in the unit hypercube. This is done in order to exploit the optimally uniform properties of the Sobol sequence. Returns: A tuple containing first the numpy array representing the input in the unit hypercube that minimizes the negative of the acquisition function (or, equivalently, maximizes the acquisition function) as well as the value of the acquisition function at the discovered maximum. """ # Number of dimensions. k = self.model.X.shape[1] # x = np.random.uniform(low=0., high=1., size=(1, k)) x = sobol_seq.i4_sobol(k, index+1)[0] try: # Bounds on the search space used by the BFGS algorithm. bounds = [(0., 1.)] * k # Call the BFGS algorithm to perform the maximization. res = fmin_l_bfgs_b( self.__negative_acquisition_function, x, fprime=self.__negative_acquisition_function_grad, bounds=bounds, disp=0 ) x_max = np.atleast_2d(res[0]) except AttributeError: # In the case of a non-differentiable model, instead use the initial # random sample. x_max = x return x_max.ravel(), self.evaluate(x_max)
def __init__(self, kernel=None, alpha=1e-10, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, normalize_y=False, copy_X_train=True, random_state=None): self.kernel = kernel self.alpha = alpha self.optimizer = optimizer self.n_restarts_optimizer = n_restarts_optimizer self.normalize_y = normalize_y self.copy_X_train = copy_X_train self.random_state = random_state
def _constrained_optimization(self, obj_func, initial_theta, bounds): if self.optimizer == "fmin_l_bfgs_b": theta_opt, func_min, convergence_dict = \ fmin_l_bfgs_b(obj_func, initial_theta, bounds=bounds) if convergence_dict["warnflag"] != 0: warnings.warn("fmin_l_bfgs_b terminated abnormally with the " " state: %s" % convergence_dict) elif callable(self.optimizer): theta_opt, func_min = \ self.optimizer(obj_func, initial_theta, bounds=bounds) else: raise ValueError("Unknown optimizer %s." % self.optimizer) return theta_opt, func_min
def __init__(self, kernel=None, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, max_iter_predict=100, warm_start=False, copy_X_train=True, random_state=None): self.kernel = kernel self.optimizer = optimizer self.n_restarts_optimizer = n_restarts_optimizer self.max_iter_predict = max_iter_predict self.warm_start = warm_start self.copy_X_train = copy_X_train self.random_state = random_state
def __init__(self, kernel=None, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, max_iter_predict=100, warm_start=False, copy_X_train=True, random_state=None, multi_class="one_vs_rest", n_jobs=1): self.kernel = kernel self.optimizer = optimizer self.n_restarts_optimizer = n_restarts_optimizer self.max_iter_predict = max_iter_predict self.warm_start = warm_start self.copy_X_train = copy_X_train self.random_state = random_state self.multi_class = multi_class self.n_jobs = n_jobs
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); print('var1');print(var1);print('var2');print(var2); #if abs(sum(psi1)/len(psi1)-sum(psi2)/len(psi2))>cutoff: if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); #else: # theta1=sum(psi1)/len(psi1);theta2=sum(psi2)/len(psi2); beta_0=theta1;beta_1=theta2; #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; print('constrain_2xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2; print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]); #return([likelihood_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); xopt=fmin_l_bfgs_b(myfunc_multivar,[beta_0,beta_1],myfunc_multivar_der,args=[psi1,psi2,var1,var2],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); beta_0=float(xopt[0][0]); beta_1=float(xopt[0][1]); print('unconstrain_1xopt');print(xopt); print('theta');print(beta_0);print(beta_1);print('theta_end'); #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2;print print('count');print(count);('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('unconstrain');print(beta_0);print(beta_0+beta_1);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]); #return([likelihood_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def solve_dual_reps(R, epsilon, min_eta): """Solve dual function for REPS. Parameters ---------- R : array, shape (n_samples_per_update,) Corresponding obtained rewards epsilon : float Maximum Kullback-Leibler divergence of two successive policy distributions. min_eta : float Minimum eta, 0 would result in numerical problems Returns ------- d : array, shape (n_samples_per_update,) Weights for training samples eta : float Temperature """ if R.ndim != 1: raise ValueError("Returns must be passed in a flat array!") R_max = R.max() R_min = R.min() if R_max == R_min: return np.ones(R.shape[0]) / float(R.shape[0]), np.nan # eta not known # Normalize returns into range [0, 1] such that eta (and min_eta) # always lives on the same scale R = (R - R_min) / (R_max - R_min) # Definition of the dual function def g(eta): # Objective function return eta * epsilon + eta * logsumexp(R / eta, b=1.0 / len(R)) # Lower bound for Lagrangian eta bounds = np.array([[min_eta, None]]) # Start point of optimization x0 = [1] # Perform the actual optimization of the dual function r = fmin_l_bfgs_b(g, x0, approx_grad=True, bounds=bounds) # Fetch optimal Lagrangian parameter eta. Corresponds to a temperature # of a softmax distribution eta = r[0][0] # Determine weights of individual samples based on the their return and # the "temperature" eta log_d = R / eta # Numerically stable softmax version of the weights. Note that # this does neither changes the solution of the weighted least # squares nor the estimation of the covariance. d = np.exp(log_d - log_d.max()) d /= d.sum() return d, r[0]
def solve_unit_norm_dual(lhs, rhs, lambd0, factr=1e7, debug=False, lhs_is_toeplitz=False): if np.all(rhs == 0): return np.zeros(lhs.shape[0]), 0. n_atoms = lambd0.shape[0] n_times_atom = lhs.shape[0] // n_atoms # precompute SVD # U, s, V = linalg.svd(lhs) if lhs_is_toeplitz: # first column of the toeplitz matrix lhs lhs_c = lhs[0, :] # lhs will not stay toeplitz if we add different lambd on the diagonal assert n_atoms == 1 def x_star(lambd): lambd += 1e-14 # avoid numerical issues # lhs_inv = np.dot(V.T / (s + np.repeat(lambd, n_times_atom)), U.T) # return np.dot(lhs_inv, rhs) lhs_c_copy = lhs_c.copy() lhs_c_copy[0] += lambd return linalg.solve_toeplitz(lhs_c_copy, rhs) else: def x_star(lambd): lambd += 1e-14 # avoid numerical issues # lhs_inv = np.dot(V.T / (s + np.repeat(lambd, n_times_atom)), U.T) # return np.dot(lhs_inv, rhs) return linalg.solve(lhs + np.diag(np.repeat(lambd, n_times_atom)), rhs) def dual(lambd): x_hats = x_star(lambd) norms = linalg.norm(x_hats.reshape(-1, n_times_atom), axis=1) return (x_hats.T.dot(lhs).dot(x_hats) - 2 * rhs.T.dot(x_hats) + np.dot( lambd, norms ** 2 - 1.)) def grad_dual(lambd): x_hats = x_star(lambd).reshape(-1, n_times_atom) return linalg.norm(x_hats, axis=1) ** 2 - 1. def func(lambd): return -dual(lambd) def grad(lambd): return -grad_dual(lambd) bounds = [(0., None) for idx in range(0, n_atoms)] if debug: assert optimize.check_grad(func, grad, lambd0) < 1e-5 lambd_hats, _, _ = optimize.fmin_l_bfgs_b(func, x0=lambd0, fprime=grad, bounds=bounds, factr=factr) x_hat = x_star(lambd_hats) return x_hat, lambd_hats
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; #print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); #print('var1');print(var1);print('var2');print(var2); #if abs(sum(psi1)/len(psi1)-sum(psi2)/len(psi2))>cutoff: if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; #print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); #else: # theta1=sum(psi1)/len(psi1);theta2=sum(psi2)/len(psi2); beta_0=theta1;beta_1=theta2; #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; #print('constrain_2xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); #print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2; #print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]); #return([likelihood_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; #print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); xopt=fmin_l_bfgs_b(myfunc_multivar,[beta_0,beta_1],myfunc_multivar_der,args=[psi1,psi2,var1,var2],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); beta_0=float(xopt[0][0]); beta_1=float(xopt[0][1]); #print('unconstrain_1xopt');print(xopt); #print('theta');print(beta_0);print(beta_1);print('theta_end'); #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); #print('new_psi[0]');print(new_psi1[0]);#print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2;#print #print('count');print(count);('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('unconstrain');print(beta_0);print(beta_0+beta_1);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]); #return([likelihood_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def fit_marginal_likelihood(X, y, n_restarts, kernel, model_class): """Fit the parameters of the Gaussian process by maximizing the marginal log-likelihood of the data. """ # Initialize best log-likelihood observed so far and initialize the # search bounds of the BFGS algorithm. best_ll = -np.inf bounds = kernel.bounds for i in range(n_restarts): # Keep track of progress. print("Progress:\t{} / {}".format(i, n_restarts)) # Randomly generate kernel parameters. initial_params = np.concatenate( [p.sample() for p in kernel.parameters] ) # Train the model. try: # Minimize the negative marginal likelihood. res = fmin_l_bfgs_b( __negative_marginal_likelihood, initial_params, fprime=__negative_marginal_likelihood_grad, args=(X, y, kernel, model_class), bounds=bounds, disp=0 ) bfgs_params = res[0] except np.linalg.linalg.LinAlgError: print("Linear algebra failure.") continue except UnboundLocalError: print("Unbound local variable.") continue else: # Update kernel parameters. kernel.update(bfgs_params) model = model_class(kernel) model.fit(X, y) # Keep track of the kernel parameters corresponding to the best # model learned so far. cur_ll = model.log_likelihood() if cur_ll > best_ll: best_ll = cur_ll best_params = bfgs_params best_model = model print("Best log-likelihood:\t{}".format(best_ll)) # Set the parameters of the Gaussian process according to the kernel # parameters. kernel.update(best_params) model = model_class(kernel) model.fit(X, y) # Return the Gaussian process whose kernel parameters were estimated via # maximum marginal likelihood. return model