我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用sklearn.externals.joblib.Parallel()。
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator, X, y, scorer, parameter_iterable, fit_params, error_score, cv, **kwargs): groups = kwargs.pop('groups') # test_score, n_samples, parameters out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorer, train, test, verbose, parameters, fit_params=fit_params, return_train_score=False, return_n_test_samples=True, return_times=False, return_parameters=True, error_score=error_score) for parameters in parameter_iterable for train, test in cv.split(X, y, groups)) # test_score, n_samples, _, parameters return [(mod[0], mod[1], None, mod[2]) for mod in out]
def post(self): data = tornado.escape.json_decode(self.request.body) is_debug = data["debug"] query = data["query"] message = {"posts": []} if is_debug: from elephant_sense.debug import search_posts_dummy posts = search_posts_dummy(query, count=30) posts = self.scoring(posts) message["posts"] = [self.trim(p) for p in posts] self.write(message) else: posts = search_posts(query, n=50) # limit for performance. need improvements for feature extraction. process = 4 batch_size = len(posts) / process tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)] dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks) posts = [] for scoreds in dones: posts += [self.trim(s) for s in scoreds] posts = sorted(posts, key=lambda p: p["score"], reverse=True) message["posts"] = posts self.write(message)
def _compute_features(self, raw_documents): values = array.array(str("f")) print "Preloading regexes" dummy_processor = event_classifier.StringProcessor('') for name, rule in named_rules_list: dummy_processor.count_tokens(rule) print "Computing Features" result = Parallel( n_jobs=7 if process_all else 1, verbose=10 )(delayed(process_doc)(fb_event) for event_id, fb_event in raw_documents) for row_values in result: values.extend(row_values) X = np.array(values) X.shape = (len(raw_documents), len(self.features)) return X
def _extract_and_write(self, X, neighbor_id_lists, distances_to_neighbors, fileName = "l2r_train", y = None): labels_in_neighborhood = Parallel(n_jobs=self.n_jobs)( delayed(_create_training_samples)(cur_doc, neighbor_list, X, y, cur_doc + 1, distances_to_neighbors, self.count_concepts, self.count_terms, self.number_of_concepts, self.ibm1 if self.n_jobs == 1 and self.translation_probability else None) for cur_doc, neighbor_list in enumerate(neighbor_id_lists)) doc_to_neighborhood_dict = self._merge_dicts(labels_in_neighborhood) filenames = ["samples_" + str(qid + 1) + ".tmp" for qid in range(len(doc_to_neighborhood_dict))] with open(fileName, 'w') as outfile: for fname in filenames: with open(fname) as infile: for line in infile: outfile.write(line) outfile.write('\n') return doc_to_neighborhood_dict
def _distarray_missing(self, xc, xd, cdiffs): """Distance array for data with missing values""" cindices = [] dindices = [] for i in range(self._datalen): cindices.append(np.where(np.isnan(xc[i]))[0]) dindices.append(np.where(np.isnan(xd[i]))[0]) if self.n_jobs != 1: dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)) else: dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)] return np.array(dist_array) #==================================================================# ############################# ReliefF ############################################
def _run_algorithm(self): sm = cnt = 0 for i in range(self._datalen): sm += sum(self._distance_array[i]) cnt += len(self._distance_array[i]) avg_dist = sm / float(cnt) attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)] NN_near_list = [i[0] for i in NNlist] NN_far_list = [i[1] for i in NNlist] if self.n_jobs != 1: scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( SURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) else: scores = np.sum([SURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0) return np.array(scores)
def _run_algorithm(self): attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] NN_near_list = [i[0] for i in NNlist] NN_far_list = [i[1] for i in NNlist] if self.n_jobs != 1: scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( MultiSURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) else: scores = np.sum([MultiSURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0) return np.array(scores)
def _generateFragments(self): voc=set(self.vocabulary) fpsdict = dict([(idx,{}) for idx in self.moldata.index]) nrows = self.moldata.shape[0] counter = 0 with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel: while counter < nrows: nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows) result = parallel(delayed(_generateMolFrags)(mollist, voc, self.fragmentMethod, self.fragIdx) for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize)) for r in result: counter+=len(r) fpsdict.update(r) self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1] # construct the molecule-fragment matrix as input for the LDA algorithm
def fit_transform(self, X, y=None, **fit_params): """ Fits the transformer using ``X`` (and possibly ``y``). Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. Returns: ``self`` """ verify_x_type(X) verify_y_type(y) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter()) return self.__concat(Xts)
def calc_fitness(self,X,labels,fit_choice,sel): """computes fitness of individual output yhat. yhat: output of a program. labels: correct outputs fit_choice: choice of fitness function """ if 'lexicase' in sel: # return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X)) return np.asarray( [self.proper(self.f_vec[fit_choice](labels, yhat)) for yhat in X], order='F') # return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X)) else: # return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X)) return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X], order='F').reshape(-1) # return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator, X, y, scorer, parameter_iterable, fit_params, error_score, cv, **kwargs): # test_score, n_samples, score_time, parameters return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)( delayed(_fit_and_score)( clone(base_estimator), X, y, scorer, train, test, verbose, parameters, fit_params, return_parameters=True, error_score=error_score) for parameters in parameter_iterable for train, test in cv)
def Parallel2(*args, **kwargs): kwargs['backend'] = None return Parallel(*args, **kwargs)
def fit(self, X, y, sample_weight=None): """Fit (estimates) the centroids. Parameters ---------- X : ndarray, shape (n_trials, n_channels, n_channels) ndarray of SPD matrices. y : ndarray shape (n_trials, 1) labels corresponding to each trial. sample_weight : None | ndarray shape (n_trials, 1) the weights of each sample. if None, each sample is treated with equal weights. Returns ------- self : MDM instance The MDM instance. """ self.classes_ = numpy.unique(y) self.covmeans_ = [] if sample_weight is None: sample_weight = numpy.ones(X.shape[0]) if self.n_jobs == 1: for l in self.classes_: self.covmeans_.append( mean_covariance(X[y == l], metric=self.metric_mean, sample_weight=sample_weight[y == l])) else: self.covmeans_ = Parallel(n_jobs=self.n_jobs)( delayed(mean_covariance)(X[y == l], metric=self.metric_mean, sample_weight=sample_weight[y == l]) for l in self.classes_) return self
def _predict_distances(self, covtest): """Helper to predict the distance. equivalent to transform.""" Nc = len(self.covmeans_) if self.n_jobs == 1: dist = [distance(covtest, self.covmeans_[m], self.metric_dist) for m in range(Nc)] else: dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)( covtest, self.covmeans_[m], self.metric_dist) for m in range(Nc)) dist = numpy.concatenate(dist, axis=1) return dist
def fit(self, epochs): self.picks = _handle_picks(info=epochs.info, picks=self.picks) _check_data(epochs, picks=self.picks, ch_constraint='single_channel_type', verbose=self.verbose) self.ch_type = _get_channel_type(epochs, self.picks) n_epochs = len(epochs) self.ch_subsets_ = self._get_random_subsets(epochs.info) self.mappings_ = self._get_mappings(epochs) n_jobs = check_n_jobs(self.n_jobs) parallel = Parallel(n_jobs, verbose=10) my_iterator = delayed(_iterate_epochs) if self.verbose is not False and self.n_jobs > 1: print('Iterating epochs ...') verbose = False if self.n_jobs > 1 else self.verbose corrs = parallel(my_iterator(self, epochs, idxs, verbose) for idxs in np.array_split(np.arange(n_epochs), n_jobs)) self.corr_ = np.concatenate(corrs) if self.verbose is not False and self.n_jobs > 1: print('[Done]') # compute how many windows is a sensor RANSAC-bad self.bad_log = np.zeros_like(self.corr_) self.bad_log[self.corr_ < self.min_corr] = 1 bad_log = self.bad_log.sum(axis=0) bad_idx = np.where(bad_log > self.unbroken_time * n_epochs)[0] if len(bad_idx) > 0: self.bad_chs_ = [ epochs.info['ch_names'][self.picks[p]] for p in bad_idx] else: self.bad_chs_ = [] return self
def _cpu_map(fun, param_grid, n_jobs, verbose=True): return Parallel( n_jobs=n_jobs, verbose=verbose, backend='threading', # any sklearn backend should work here )( delayed(fun)( params ) for params in param_grid)
def _cpu_map(fun, param_grid, n_jobs, verbose): return Parallel( n_jobs=n_jobs, verbose=verbose, backend='threading', # any sklearn backend should work here )( delayed(fun)( params ) for params in param_grid)
def search_test_params(base_clf, cv_params, X, y, train, test, scoring): parameter_iterable = ParameterGrid(cv_params) grid_scores = Parallel(n_jobs=-1)( delayed(_fit_and_score)(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable) # grid_scores = [_fit_and_score(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable] grid_scores = sorted(grid_scores, key=lambda x: x[0], reverse=True) scores, _, _, parameters = grid_scores[0] return scores, parameters
def score(self, imgs, confounds=None): """ Score the images on the learning spatial pipelining, based on the objective function value that is minimized by the algorithm. Lower means better fit. Parameters ---------- imgs: list of Niimg-like objects See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. Data on which PCA must be calculated. If this is a list, the affine is considered the same for all. confounds: CSV file path or 2D matrix This parameter is passed to nilearn.signal.clean. Please see the related documentation for details Returns ------- score: float Average score on all input data """ if (isinstance(imgs, str) or not hasattr(imgs, '__iter__')): imgs = [imgs] if confounds is None: confounds = itertools.repeat(None) scores = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._cache(_score_img, func_memory_level=1))( self.coder_, self.masker_, img, these_confounds) for img, these_confounds in zip(imgs, confounds)) scores = np.array(scores) try: len_imgs = np.array([check_niimg(img).get_shape()[3] for img in imgs]) except ImageFileError: len_imgs = np.array([np.load(img, mmap_mode='r').shape[0] for img in imgs]) score = np.sum(scores * len_imgs) / np.sum(len_imgs) return score
def transform(self, imgs, confounds=None): """Compute the mask and the ICA maps across subjects Parameters ---------- batch_size imgs: list of Niimg-like objects See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg. Data on which PCA must be calculated. If this is a list, the affine is considered the same for all. confounds: CSV file path or 2D matrix This parameter is passed to nilearn.signal.clean. Please see the related documentation for details Returns ------- codes, list of ndarray, shape = n_images * (n_samples, n_components) Loadings for each of the images, and each of the time steps """ if (isinstance(imgs, str) or not hasattr(imgs, '__iter__')): imgs = [imgs] if confounds is None: confounds = itertools.repeat(None) codes = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._cache(_transform_img, func_memory_level=1))( self.coder_, self.masker_, img, these_confounds) for img, these_confounds in zip(imgs, confounds)) return codes
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """ Evaluate a score by cross-validation """ if not isinstance(scoring, (list, tuple)): scoring = [scoring] X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) splits = list(cv.split(X, y, groups)) scorer = [check_scoring(estimator, scoring=s) for s in scoring] # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in splits) group_order = [] if hasattr(cv, 'groups'): group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits] return np.squeeze(np.array(scores)), group_order
def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): """ Evaluate the significance of a cross-validated score with permutations, as in test 1 of [Ojala2010]_. A modification of original sklearn's permutation test score function to evaluate p-value outside this function, so that the score can be reused from outside. .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) return permutation_scores
def anomaly_score(self, X=None): """Compute anomaly scores for test samples. Parameters ---------- X : array-like of shape (n_samples, n_features), default None Test samples. Returns ------- y_score : array-like of shape (n_samples,) Anomaly scores for test samples. """ check_is_fitted(self, '_knn') if X is None: X = self._knn._fit_X ind = self._knn.kneighbors(None, return_distance=False) else: X = check_array(X) ind = self._knn.kneighbors(X, return_distance=False) n_samples, _ = X.shape try: result = Parallel(self.n_jobs)( delayed(_abof)( X[s], ind[s], self._knn._fit_X ) for s in gen_even_slices(n_samples, self.n_jobs) ) except FloatingPointError as e: raise ValueError('X must not contain training samples') from e return -np.concatenate(result)
def _pairwise_wmd(self, X_test, X_train=None): """Computes the word mover's distance between all train and test points. Parallelized over rows of X_test. Assumes that train and test samples are sparse BOW vectors summing to 1. Parameters ---------- X_test: scipy.sparse matrix, shape: (n_test_samples, vocab_size) Test samples. X_train: scipy.sparse matrix, shape: (n_train_samples, vocab_size) Training samples. If `None`, uses the samples the estimator was fit with. Returns ------- dist : array, shape: (n_test_samples, n_train_samples) Distances between all test samples and all train samples. """ n_samples_test = X_test.shape[0] if X_train is None: X_train = self._fit_X if self.n_jobs == 1: dist = [ self._wmd_row( test_sample , X_train ) for test_sample in X_test ] else: dist = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._wmd_row) (test_sample, X_train) for test_sample in X_test) return np.array(dist)
def _extract_features(self, topNIndices, topNDistances, y, distances): samples = self._split_samples(topNIndices, topNDistances, y) training_data_list = Parallel(n_jobs=self.n_jobs)( delayed(_analyze)(tI, tD, y, distances, self.dependencies) for tI, tD, y in samples) # merge training data training_data = defaultdict(list) for training_data_dict in training_data_list: for label, training_samples_of_label in training_data_dict.items(): training_data[label].extend(training_data_dict[label]) return training_data
def fit(self, X, y): """Fit one regressor for each quantile. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: Training vectors, where `n_samples` is the number of samples and `n_features` is the number of features. * `y` [array-like, shape=(n_samples,)]: Target values (real numbers in regression) """ rng = check_random_state(self.random_state) if self.base_estimator is None: base_estimator = GradientBoostingRegressor(loss='quantile') else: base_estimator = self.base_estimator if not isinstance(base_estimator, GradientBoostingRegressor): raise ValueError('base_estimator has to be of type' ' GradientBoostingRegressor.') if not base_estimator.loss == 'quantile': raise ValueError('base_estimator has to use quantile' ' loss not %s' % base_estimator.loss) # The predictions for different quantiles should be sorted. # Therefore each of the regressors need the same seed. base_estimator.set_params(random_state=rng) regressors = [] for q in self.quantiles: regressor = clone(base_estimator) regressor.set_params(alpha=q) regressors.append(regressor) self.regressors_ = Parallel(n_jobs=self.n_jobs, backend='threading')( delayed(_parallel_fit)(regressor, X, y) for regressor in regressors) return self
def fit_transform(self, X, y=None, **fit_params): self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, name, weight, X, y, **fit_params) for name, trans, weight in self._iter()) to_concats = [r[0] for r in result] if self.drop: return self._concat_just_right(to_concats[0], to_concats[1:]) return self._concat_just_right(X, to_concats)
def transform(self, X): Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, name, weight, X) for name, trans, weight in self._iter()) if self.drop: return self._concat_just_right(Xs[0], Xs[1:]) return self._concat_just_right(X, Xs)
def fit_transform(self, X, y=None, **fit_params): self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)(trans, name, weight, X, y, **fit_params) for name, trans, weight in self._iter()) Xs = [r[0] for r in result] Xs.insert(0, X) return pd.concat(Xs, axis=0)
def transform(self, X): Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, name, weight, X) for name, trans, weight in self._iter()) Xs.insert(0, X) return pd.concat(Xs, axis=0)
def _run_algorithm(self): attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = map(self._find_neighbors, range(self._datalen)) scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( ReliefF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
def _run_algorithm(self): attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( MultiSURF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
def transform(self, X, *args, **kwargs): """ Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. """ verify_x_type(X) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter()) return self.__concat(Xts)
def transform(self,x,inds=None,labels = None): """return a transformation of x using population outputs""" if inds: # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) # for I in inds)).transpose() return np.asarray( [self.out(I,x,labels,self.otype) for I in inds]).transpose() elif self._best_inds: # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) # for I in self._best_inds)).transpose() return np.asarray( [self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose() else: return x
def _fit_multiclass(self, X, y, alpha, C, learning_rate, sample_weight, n_iter): """Fit a multi-class classifier by combining binary classifiers Each binary classifier predicts one class versus all others. This strategy is called OVA: One Versus All. """ # Use joblib to fit OvA in parallel. result = Parallel(n_jobs=self.n_jobs, backend="threading", verbose=self.verbose)( delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate, n_iter, self._expanded_class_weight[i], 1., sample_weight) for i in range(len(self.classes_))) for i, (_, intercept) in enumerate(result): self.intercept_[i] = intercept self.t_ += n_iter * X.shape[0] if self.average > 0: if self.average <= self.t_ - 1.0: self.coef_ = self.average_coef_ self.intercept_ = self.average_intercept_ else: self.coef_ = self.standard_coef_ self.standard_intercept_ = np.atleast_1d(self.intercept_) self.intercept_ = self.standard_intercept_
def get_mi_vector(MI_FS, F, s): """ Calculates the Mututal Information between each feature in F and s. This function is for when |S| > 1. s is the previously selected feature. We exploite the fact that this step is embarrassingly parallel. """ MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS) for f in F) return MIs
def get_first_mi_vector(MI_FS, k): """ Calculates the Mututal Information between each feature in X and y. This function is for when |S| = 0. We select the first feautre in S. """ n, p = MI_FS.X.shape MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS) for i in xrange(p)) return MIs
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1, mmap_mode=None): """Prepare multi subject data in parallel Parameters ---------- imgs_list: list of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html. List of imgs file to prepare. One item per subject. confounds: list of confounds, optional List of confounds (2D arrays or filenames pointing to CSV files). Must be of same length than imgs_list. copy: boolean, optional If True, guarantees that output array has no memory in common with input array. n_jobs: integer, optional The number of cpus to use to do the computation. -1 means 'all cpus'. Returns ------- region_signals: list of 2D numpy.ndarray List of signal for each element per subject. shape: list of (number of scans, number of elements) """ self._check_fitted() raw = True # Check whether all imgs from imgs_list are numpy instance, or fallback # to MultiNiftiMasker (could handle hybrid imgs_list but we do not # need it for the moment) for imgs in imgs_list: if isinstance(imgs, str): name, ext = os.path.splitext(imgs) if ext != '.npy': raw = False break elif not isinstance(imgs, np.ndarray): raw = False break if raw: data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs, mmap_mode=mmap_mode) for imgs in imgs_list) return data else: return MultiNiftiMasker.transform_imgs(self, imgs_list, confounds=confounds, copy=copy, n_jobs=n_jobs, )
def fit(self, X, y): """Actual fitting, performing the search over parameters.""" parameter_iterable = ParameterGrid(self.param_grid) estimator = self.estimator cv = self.cv n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring, parameters, cv=cv) for parameters in parameter_iterable) best = sorted(out, key=lambda x: x[0])[-1] self.best_params_ = best[1] self.best_score_ = best[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best[1]) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def fit(self, X, y): """Actual fitting, performing the search over parameters.""" parameter_iterable = ParameterSampler(self.param_distributions, self.n_iter, random_state=self.random_state) estimator = self.estimator cv = self.cv n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )( delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring, parameters, cv=cv) for parameters in parameter_iterable) best = sorted(out, reverse=True)[0] self.best_params_ = best[1] self.best_score_ = best[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best[1]) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def run(n_calls=32, n_runs=1, save_traces=True, n_jobs=1): """ Main function used to run the experiments. Parameters ---------- * `n_calls`: int Evaluation budget. * `n_runs`: int Number of times to repeat the optimization in order to average out noise. * `save_traces`: bool Whether or not to save data collected during optimization * `n_jobs`: int Number of different repeats of optimization to run in parallel. """ surrogate_minimizers = [gbrt_minimize, forest_minimize, gp_minimize] selected_models = sorted(MODELS, key=lambda x: x.__name__) selected_datasets = (DATASETS.keys()) # all the parameter values and objectives collected during execution are stored in list below all_data = {} for model in selected_models: all_data[model] = {} for dataset in selected_datasets: if not issubclass(model, DATASETS[dataset]): continue all_data[model][dataset] = {} for surrogate_minimizer in surrogate_minimizers: print(surrogate_minimizer.__name__, model.__name__, dataset) seeds = np.random.randint(0, 2**30, n_runs) raw_trace = Parallel(n_jobs=n_jobs)( delayed(evaluate_optimizer)( surrogate_minimizer, model, dataset, n_calls, seed ) for seed in seeds ) all_data[model][dataset][surrogate_minimizer.__name__] = raw_trace # convert the model keys to strings so that results can be saved as json all_data = {k.__name__: v for k,v in all_data.items()} # dump the recorded objective values as json if save_traces: with open(datetime.now().strftime("%m_%Y_%d_%H_%m_%s")+'.json', 'w') as f: json.dump(all_data, f) calculate_performance(all_data)