我们从Python开源项目中,提取了以下24个代码示例,用于说明如何使用sklearn.externals.joblib.delayed()。
def post(self): data = tornado.escape.json_decode(self.request.body) is_debug = data["debug"] query = data["query"] message = {"posts": []} if is_debug: from elephant_sense.debug import search_posts_dummy posts = search_posts_dummy(query, count=30) posts = self.scoring(posts) message["posts"] = [self.trim(p) for p in posts] self.write(message) else: posts = search_posts(query, n=50) # limit for performance. need improvements for feature extraction. process = 4 batch_size = len(posts) / process tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)] dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks) posts = [] for scoreds in dones: posts += [self.trim(s) for s in scoreds] posts = sorted(posts, key=lambda p: p["score"], reverse=True) message["posts"] = posts self.write(message)
def run(n_seeds, n_jobs, _run, _seed): seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max, size=n_seeds) exps = [] exps += [{'method': 'sgd', 'step_size': step_size} for step_size in np.logspace(-3, 3, 7)] exps += [{'method': 'gram', 'reduction': reduction} for reduction in [1, 4, 6, 8, 12, 24]] rundir = join(basedir, str(_run._id), 'run') if not os.path.exists(rundir): os.makedirs(rundir) Parallel(n_jobs=n_jobs, verbose=10)(delayed(single_run)(config_updates, rundir, i) for i, config_updates in enumerate(exps))
def run(n_seeds, n_jobs, _run, _seed): seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max, size=n_seeds) exps = [] exps += [{'method': 'sgd', 'step_size': step_size} for step_size in np.logspace(-7, -7, 1)] exps += [{'method': 'gram', 'reduction': reduction} for reduction in [12]] rundir = join(basedir, str(_run._id), 'run') if not os.path.exists(rundir): os.makedirs(rundir) Parallel(n_jobs=n_jobs, verbose=10)(delayed(single_run)(config_updates, rundir, i) for i, config_updates in enumerate(exps))
def _compute_features(self, raw_documents): values = array.array(str("f")) print "Preloading regexes" dummy_processor = event_classifier.StringProcessor('') for name, rule in named_rules_list: dummy_processor.count_tokens(rule) print "Computing Features" result = Parallel( n_jobs=7 if process_all else 1, verbose=10 )(delayed(process_doc)(fb_event) for event_id, fb_event in raw_documents) for row_values in result: values.extend(row_values) X = np.array(values) X.shape = (len(raw_documents), len(self.features)) return X
def _distarray_missing(self, xc, xd, cdiffs): """Distance array for data with missing values""" cindices = [] dindices = [] for i in range(self._datalen): cindices.append(np.where(np.isnan(xc[i]))[0]) dindices.append(np.where(np.isnan(xd[i]))[0]) if self.n_jobs != 1: dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)) else: dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)] return np.array(dist_array) #==================================================================# ############################# ReliefF ############################################
def _run_algorithm(self): sm = cnt = 0 for i in range(self._datalen): sm += sum(self._distance_array[i]) cnt += len(self._distance_array[i]) avg_dist = sm / float(cnt) attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)] NN_near_list = [i[0] for i in NNlist] NN_far_list = [i[1] for i in NNlist] if self.n_jobs != 1: scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( SURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) else: scores = np.sum([SURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0) return np.array(scores)
def _run_algorithm(self): attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] NN_near_list = [i[0] for i in NNlist] NN_far_list = [i[1] for i in NNlist] if self.n_jobs != 1: scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( MultiSURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0) else: scores = np.sum([MultiSURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes, NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0) return np.array(scores)
def _generateFragments(self): voc=set(self.vocabulary) fpsdict = dict([(idx,{}) for idx in self.moldata.index]) nrows = self.moldata.shape[0] counter = 0 with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel: while counter < nrows: nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows) result = parallel(delayed(_generateMolFrags)(mollist, voc, self.fragmentMethod, self.fragIdx) for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize)) for r in result: counter+=len(r) fpsdict.update(r) self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1] # construct the molecule-fragment matrix as input for the LDA algorithm
def fit_transform(self, X, y=None, **fit_params): """ Fits the transformer using ``X`` (and possibly ``y``). Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. Returns: ``self`` """ verify_x_type(X) verify_y_type(y) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter()) return self.__concat(Xts)
def calc_fitness(self,X,labels,fit_choice,sel): """computes fitness of individual output yhat. yhat: output of a program. labels: correct outputs fit_choice: choice of fitness function """ if 'lexicase' in sel: # return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X)) return np.asarray( [self.proper(self.f_vec[fit_choice](labels, yhat)) for yhat in X], order='F') # return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X)) else: # return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X)) return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X], order='F').reshape(-1) # return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
def fit(self, X, y, sample_weight=None): """Fit (estimates) the centroids. Parameters ---------- X : ndarray, shape (n_trials, n_channels, n_channels) ndarray of SPD matrices. y : ndarray shape (n_trials, 1) labels corresponding to each trial. sample_weight : None | ndarray shape (n_trials, 1) the weights of each sample. if None, each sample is treated with equal weights. Returns ------- self : MDM instance The MDM instance. """ self.classes_ = numpy.unique(y) self.covmeans_ = [] if sample_weight is None: sample_weight = numpy.ones(X.shape[0]) if self.n_jobs == 1: for l in self.classes_: self.covmeans_.append( mean_covariance(X[y == l], metric=self.metric_mean, sample_weight=sample_weight[y == l])) else: self.covmeans_ = Parallel(n_jobs=self.n_jobs)( delayed(mean_covariance)(X[y == l], metric=self.metric_mean, sample_weight=sample_weight[y == l]) for l in self.classes_) return self
def _predict_distances(self, covtest): """Helper to predict the distance. equivalent to transform.""" Nc = len(self.covmeans_) if self.n_jobs == 1: dist = [distance(covtest, self.covmeans_[m], self.metric_dist) for m in range(Nc)] else: dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)( covtest, self.covmeans_[m], self.metric_dist) for m in range(Nc)) dist = numpy.concatenate(dist, axis=1) return dist
def fit(self, epochs): self.picks = _handle_picks(info=epochs.info, picks=self.picks) _check_data(epochs, picks=self.picks, ch_constraint='single_channel_type', verbose=self.verbose) self.ch_type = _get_channel_type(epochs, self.picks) n_epochs = len(epochs) self.ch_subsets_ = self._get_random_subsets(epochs.info) self.mappings_ = self._get_mappings(epochs) n_jobs = check_n_jobs(self.n_jobs) parallel = Parallel(n_jobs, verbose=10) my_iterator = delayed(_iterate_epochs) if self.verbose is not False and self.n_jobs > 1: print('Iterating epochs ...') verbose = False if self.n_jobs > 1 else self.verbose corrs = parallel(my_iterator(self, epochs, idxs, verbose) for idxs in np.array_split(np.arange(n_epochs), n_jobs)) self.corr_ = np.concatenate(corrs) if self.verbose is not False and self.n_jobs > 1: print('[Done]') # compute how many windows is a sensor RANSAC-bad self.bad_log = np.zeros_like(self.corr_) self.bad_log[self.corr_ < self.min_corr] = 1 bad_log = self.bad_log.sum(axis=0) bad_idx = np.where(bad_log > self.unbroken_time * n_epochs)[0] if len(bad_idx) > 0: self.bad_chs_ = [ epochs.info['ch_names'][self.picks[p]] for p in bad_idx] else: self.bad_chs_ = [] return self
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """ Evaluate a score by cross-validation """ if not isinstance(scoring, (list, tuple)): scoring = [scoring] X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) splits = list(cv.split(X, y, groups)) scorer = [check_scoring(estimator, scoring=s) for s in scoring] # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in splits) group_order = [] if hasattr(cv, 'groups'): group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits] return np.squeeze(np.array(scores)), group_order
def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): """ Evaluate the significance of a cross-validated score with permutations, as in test 1 of [Ojala2010]_. A modification of original sklearn's permutation test score function to evaluate p-value outside this function, so that the score can be reused from outside. .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) return permutation_scores
def _pairwise_wmd(self, X_test, X_train=None): """Computes the word mover's distance between all train and test points. Parallelized over rows of X_test. Assumes that train and test samples are sparse BOW vectors summing to 1. Parameters ---------- X_test: scipy.sparse matrix, shape: (n_test_samples, vocab_size) Test samples. X_train: scipy.sparse matrix, shape: (n_train_samples, vocab_size) Training samples. If `None`, uses the samples the estimator was fit with. Returns ------- dist : array, shape: (n_test_samples, n_train_samples) Distances between all test samples and all train samples. """ n_samples_test = X_test.shape[0] if X_train is None: X_train = self._fit_X if self.n_jobs == 1: dist = [ self._wmd_row( test_sample , X_train ) for test_sample in X_test ] else: dist = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._wmd_row) (test_sample, X_train) for test_sample in X_test) return np.array(dist)
def _run_algorithm(self): attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = map(self._find_neighbors, range(self._datalen)) scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( ReliefF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
def _run_algorithm(self): attr = self._get_attribute_info() nan_entries = np.isnan(self._X) NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)] scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed( MultiSURF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes, NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std) for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0) return np.array(scores)
def transform(self, X, *args, **kwargs): """ Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. """ verify_x_type(X) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter()) return self.__concat(Xts)
def transform(self,x,inds=None,labels = None): """return a transformation of x using population outputs""" if inds: # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) # for I in inds)).transpose() return np.asarray( [self.out(I,x,labels,self.otype) for I in inds]).transpose() elif self._best_inds: # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) # for I in self._best_inds)).transpose() return np.asarray( [self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose() else: return x
def get_mi_vector(MI_FS, F, s): """ Calculates the Mututal Information between each feature in F and s. This function is for when |S| > 1. s is the previously selected feature. We exploite the fact that this step is embarrassingly parallel. """ MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS) for f in F) return MIs
def get_first_mi_vector(MI_FS, k): """ Calculates the Mututal Information between each feature in X and y. This function is for when |S| = 0. We select the first feautre in S. """ n, p = MI_FS.X.shape MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS) for i in xrange(p)) return MIs
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1, mmap_mode=None): """Prepare multi subject data in parallel Parameters ---------- imgs_list: list of Niimg-like objects See http://nilearn.github.io/manipulating_images/input_output.html. List of imgs file to prepare. One item per subject. confounds: list of confounds, optional List of confounds (2D arrays or filenames pointing to CSV files). Must be of same length than imgs_list. copy: boolean, optional If True, guarantees that output array has no memory in common with input array. n_jobs: integer, optional The number of cpus to use to do the computation. -1 means 'all cpus'. Returns ------- region_signals: list of 2D numpy.ndarray List of signal for each element per subject. shape: list of (number of scans, number of elements) """ self._check_fitted() raw = True # Check whether all imgs from imgs_list are numpy instance, or fallback # to MultiNiftiMasker (could handle hybrid imgs_list but we do not # need it for the moment) for imgs in imgs_list: if isinstance(imgs, str): name, ext = os.path.splitext(imgs) if ext != '.npy': raw = False break elif not isinstance(imgs, np.ndarray): raw = False break if raw: data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs, mmap_mode=mmap_mode) for imgs in imgs_list) return data else: return MultiNiftiMasker.transform_imgs(self, imgs_list, confounds=confounds, copy=copy, n_jobs=n_jobs, )
def fit(self, X, y): """Actual fitting, performing the search over parameters.""" parameter_iterable = ParameterGrid(self.param_grid) estimator = self.estimator cv = self.cv n_samples = _num_samples(X) X, y = indexable(X, y) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring, parameters, cv=cv) for parameters in parameter_iterable) best = sorted(out, key=lambda x: x[0])[-1] self.best_params_ = best[1] self.best_score_ = best[0] if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best[1]) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self