我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用sklearn.grid_search.ParameterGrid()。
def cross_validate(self, X, y): print "fitting {} to the training set".format(self.name) if self.param_grid is not None: param_sets = list(ParameterGrid(self.param_grid)) n_param_sets = len(param_sets) param_scores = [] for j, param_set in enumerate(param_sets): print "--------------" print "training the classifier..." print "parameter set:" for k, v in param_set.iteritems(): print "{}:{}".format(k, v) param_score = self.evaluate(X, y, param_set=param_set) param_scores.append(param_score) p = np.argmax(np.array(param_scores)) self.best_param_set = param_sets[p] print "best parameter set", self.best_param_set print "best score:", param_scores[p] else: score = self.evaluate(X, y)
def __get_param_iterable(self, param_grid): if self.ramdonized_search_enable: parameter_iterable = ParameterSampler(param_grid, self.randomized_search_n_iter, random_state=self.ramdonized_search_random_state) else: parameter_iterable = ParameterGrid(param_grid) return parameter_iterable
def fit(self, frame): """Fit the grid search. Parameters ---------- frame : H2OFrame, shape=(n_samples, n_features) The training frame on which to fit. """ return self._fit(frame, ParameterGrid(self.param_grid))
def fit(self, X, y=None): """Run fit with all sets of parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. """ return self._fit(X, y, ParameterGrid(self.param_grid))
def __init__(self, experiment, args, job_module_config): super(self.__class__, self).__init__(experiment, args, job_module_config) # pre-format the experiment dict # Sklearn needs all the params to be in a list for the grid to work # properly for param in experiment['params']: if type(experiment['params'][param]) is not list: experiment['params'][param] = [experiment['params'][param] ] self.searcher = ParameterGrid(experiment['params'])
def __init__(self, params,progressbar = None): if progressbar is not None: self.progress = progressbar print(params) self.paramgrid = list(ParameterGrid(params)) # create a grid of parameter permutations #self.paramgrid = ParameterGrid(params).param_grid
def search_test_params(base_clf, cv_params, X, y, train, test, scoring): parameter_iterable = ParameterGrid(cv_params) grid_scores = Parallel(n_jobs=-1)( delayed(_fit_and_score)(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable) # grid_scores = [_fit_and_score(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable] grid_scores = sorted(grid_scores, key=lambda x: x[0], reverse=True) scores, _, _, parameters = grid_scores[0] return scores, parameters
def generate_models(self, input_shape, output_dim): loss_type = self.grid.params_grid["loss"][0] for layers in self.create_network_structures(self.grid.params_grid["layers"], self.grid.params_grid["layer_nums"], input_shape): print "Current network: %s" % "->".join(layers) flat_params_grid = self.grid.create_flat_layers_grid(layers, input_shape, output_dim) for optimizer_name in self.grid.params_grid["optimizers"]: flat_grid = flat_params_grid.copy() flat_grid.update(self.grid.create_flat_optimizer_grid(optimizer_name)) for params in ParameterGrid(flat_grid): nn_params = self.grid.fold_params(params) yield self.model_factory.create_model(layers, nn_params, loss_type) # Example.
def generate_models(self, input_shape, output_dim): loss_type = self.grid.params_grid["loss"][0] for layers in self.create_network_structures(self.grid.params_grid["layers"], self.grid.params_grid["layer_nums"], input_shape): print "Current network: %s" % "->".join(layers) flat_params_grid = self.grid.create_flat_layers_grid(layers, input_shape, output_dim) for optimizer_name in self.grid.params_grid["optimizers"]: flat_grid = flat_params_grid.copy() flat_grid.update(self.grid.create_flat_optimizer_grid(optimizer_name)) n_samples = min(self.params_sample_size, len(ParameterGrid(flat_grid))) for params in ParameterSampler(flat_grid, n_samples): nn_params = self.grid.fold_params(params) yield self.model_factory.create_model(layers, nn_params, loss_type) # Example.
def test_iforest(): """Check Isolation Forest for various parameter settings.""" X_train = np.array([[0, 1], [1, 2]]) X_test = np.array([[2, 1], [1, 1]]) grid = ParameterGrid({"n_estimators": [3], "max_samples": [0.5, 1.0, 3], "bootstrap": [True, False]}) with ignore_warnings(): for params in grid: IsolationForest(random_state=rng, **params).fit(X_train).predict(X_test)
def test_iforest_sparse(): """Check IForest for various parameter settings on sparse input.""" rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({"max_samples": [0.5, 1.0], "bootstrap": [True, False]}) for sparse_format in [csc_matrix, csr_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) for params in grid: # Trained on sparse format sparse_classifier = IsolationForest( n_estimators=10, random_state=1, **params).fit(X_train_sparse) sparse_results = sparse_classifier.predict(X_test_sparse) # Trained on dense format dense_classifier = IsolationForest( n_estimators=10, random_state=1, **params).fit(X_train) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) assert_array_equal(sparse_results, dense_results)
def main(task_num,sample_size=''): embedding_size = 100 epoch = 300 best_accuracy = 0.0 grind_ris={} if not os.path.exists('data/ris/task_{}'.format(task_num)): os.makedirs('data/ris/task_{}'.format(task_num)) param_grid = {'nb': [20,30], 'lr': [0.001], 'tr': [[0,0,0,0]], 'L2': [0.0,0.0001],# [0.0,0.1,0.01,0.001,0.0001] 'bz': [32], 'dr': [0.5], } grid = list(ParameterGrid(param_grid)) np.random.shuffle(grid) for params in list(grid): data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num)) ## for sentence par = get_parameters(data,epoch,data._data['sent_len'],data._data['sent_numb'],embedding_size,params) t = train(epoch,params['bz'], data, par, dr=params['dr'], _test=False) acc = sorted([v for k,v in t[5].items()])[-1] if (acc > best_accuracy): best_accuracy = acc grind_ris[str(params)] = acc f_save = 'data/ris/task_{}/{}.PIK'.format(task_num,str(params)+str(acc)) with open(f_save, 'w') as f: pickle.dump((t), f) # batch_size = 32 # epoch = 200 # if not os.path.exists('data/ris/task_{}'.format(task_num)): # os.makedirs('data/ris/task_{}'.format(task_num)) # data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num))
def main(task_num,sample_size=''): embedding_size = 100 epoch = 300 best_accuracy = 0.0 grind_ris={} if not os.path.exists('data/ris/task_{}'.format(task_num)): os.makedirs('data/ris/task_{}'.format(task_num)) param_grid = {'nb': [20], 'lr': [0.001], 'tr': [[0,0,0,0]], 'L2': [0.001],# [0.0,0.1,0.01,0.001,0.0001] 'bz': [32], 'dr': [0.5], } grid = list(ParameterGrid(param_grid)) np.random.shuffle(grid) for params in list(grid): data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num)) ## for sentence par = get_parameters(data,epoch,data._data['sent_len'],data._data['sent_numb'],embedding_size,params) t = train(epoch,params['bz'], data, par, dr=params['dr'], _test=True) acc = sorted([v for k,v in t[5].items()])[-1] if (acc > best_accuracy): best_accuracy = acc grind_ris[str(params)] = acc f_save = 'data/ris/task_{}/{}.PIK'.format(task_num,str(params)+str(acc)) with open(f_save, 'w') as f: pickle.dump((t), f) # batch_size = 32 # epoch = 200 # if not os.path.exists('data/ris/task_{}'.format(task_num)): # os.makedirs('data/ris/task_{}'.format(task_num)) # data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num))
def main(): embedding_size = 100 epoch = 300 best_accuracy = 0.0 sent_numb,sent_len = None,None grind_ris={} param_grid = {'nb': [5], 'lr': [0.01,0.001,0.0001], 'tr': [[1,1,0,0]], 'L2': [0.001,0.0001], 'bz': [64], 'dr': [0.5], 'mw': [150], 'w' : [3,4,5], 'op': ['Adam'] } grid = list(ParameterGrid(param_grid)) np.random.shuffle(grid) for params in list(grid): data = Dataset(train_size=10000,dev_size=None,test_size=None,sent_len=sent_len, sent_numb=sent_numb, embedding_size=embedding_size, max_windows=params['mw'],win=params['w']) # ## for sentence # # par = get_parameters(data,epoch,sent_len,sent_numb,embedding_size) # ## for windows par = get_parameters(data,epoch,(params['w']*2)+1,params['mw'],embedding_size,params) t = train(epoch,params['bz'], data, par, dr=params['dr'], _test=False) acc = sorted([v for k,v in t[3].items()])[-1] if (acc > best_accuracy): best_accuracy = acc grind_ris[str(params)] = acc f_save = 'checkpoints/CNN_WIND/{}.PIK'.format(str(params)+str(acc)) with open(f_save, 'w') as f: pickle.dump((t), f)
def cv_trials(X, y, folds, model, hyper): N = len(y) cv_scores = [] predictions = { 'pred': np.zeros(N, dtype=np.bool), 'proba': np.zeros(N), 'foldno': np.zeros(N, dtype=np.int32) - 1, } pg = list(ParameterGrid(hyper)) for foldno, (train, val, test) in enumerate(folds): train_X, train_y = X[train], y[train] val_X, val_y = X[val], y[val] test_X, test_y = X[test], y[test] best_params = None best_val_f1 = None for these_params in pg: model.set_params(**these_params) model.fit(train_X, train_y) this_val_f1 = metrics.f1_score(val_y, model.predict(val_X), average="weighted") if not best_params or this_val_f1 > best_val_f1: best_params = these_params best_val_f1 = this_val_f1 if len(pg) > 1: model.set_params(**best_params) model.fit(train_X, train_y) train_f1 = metrics.f1_score(train_y, model.predict(train_X), average="weighted") preds_y = model.predict(test_X) predictions['pred'][test] = preds_y predictions['foldno'][test] = foldno fold_eval = {'f1': metrics.f1_score(test_y, preds_y, average="weighted"), 'p': metrics.precision_score(test_y, preds_y, average="weighted"), 'r': metrics.recall_score(test_y, preds_y, average="weighted"), 'a': metrics.accuracy_score(test_y, preds_y)} print "[%02d] Best hyper [train %.3f -> val %.3f -> test %.3f] %s" % (foldno, train_f1, best_val_f1, fold_eval['f1'], best_params) cv_scores.append(fold_eval) np.set_printoptions(suppress=True) # now we want to compute global evaluations, and consolidate metrics cv_scores = consolidate(cv_scores) preds_y = predictions['pred'] pooled_eval = {'f1': metrics.f1_score(y, preds_y, average="weighted"), 'p': metrics.precision_score(y, preds_y, average="weighted"), 'r': metrics.recall_score(y, preds_y, average="weighted"), 'a': metrics.accuracy_score(y, preds_y)} return pooled_eval, predictions, cv_scores
def fit(self, X, y): """Actual fitting, performing the search over parameters.""" parameter_iterable = ParameterGrid(self.param_grid) estimator = self.estimator cv = self.cv n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring, parameters, cv=cv) for parameters in parameter_iterable) best = sorted(out, key=lambda x: x[0])[-1] self.best_params_ = best[1] self.best_score_ = best[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best[1]) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self