Python sklearn.externals.joblib 模块,Parallel() 实例源码

我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用sklearn.externals.joblib.Parallel()

项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        groups = kwargs.pop('groups')

        # test_score, n_samples, parameters
        out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params=fit_params,
                return_train_score=False,
                return_n_test_samples=True,
                return_times=False,
                return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv.split(X, y, groups))

        # test_score, n_samples, _, parameters
        return [(mod[0], mod[1], None, mod[2]) for mod in out]
项目:elephant_sense    作者:chakki-works    | 项目源码 | 文件源码
def post(self):
        data = tornado.escape.json_decode(self.request.body)
        is_debug = data["debug"]
        query = data["query"]
        message = {"posts": []}
        if is_debug:
            from elephant_sense.debug import search_posts_dummy
            posts = search_posts_dummy(query, count=30)
            posts = self.scoring(posts)
            message["posts"] = [self.trim(p) for p in posts]
            self.write(message)
        else:
            posts = search_posts(query, n=50)  # limit for performance. need improvements for feature extraction.
            process = 4
            batch_size = len(posts) / process
            tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)]
            dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks)
            posts = []
            for scoreds in dones:
                posts += [self.trim(s) for s in scoreds]
            posts = sorted(posts, key=lambda p: p["score"], reverse=True)
            message["posts"] = posts
            self.write(message)
项目:dancedeets-monorepo    作者:mikelambert    | 项目源码 | 文件源码
def _compute_features(self, raw_documents):

        values = array.array(str("f"))
        print "Preloading regexes"
        dummy_processor = event_classifier.StringProcessor('')
        for name, rule in named_rules_list:
            dummy_processor.count_tokens(rule)

        print "Computing Features"
        result = Parallel(
            n_jobs=7 if process_all else 1, verbose=10
        )(delayed(process_doc)(fb_event) for event_id, fb_event in raw_documents)
        for row_values in result:
            values.extend(row_values)

        X = np.array(values)
        X.shape = (len(raw_documents), len(self.features))

        return X
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def _extract_and_write(self, X, neighbor_id_lists, distances_to_neighbors, fileName = "l2r_train", y = None):

        labels_in_neighborhood = Parallel(n_jobs=self.n_jobs)(
            delayed(_create_training_samples)(cur_doc, neighbor_list, X, y, cur_doc + 1, distances_to_neighbors, 
                                              self.count_concepts, self.count_terms, self.number_of_concepts, 
                                              self.ibm1 if self.n_jobs == 1 and self.translation_probability else None) for cur_doc, neighbor_list in enumerate(neighbor_id_lists))


        doc_to_neighborhood_dict = self._merge_dicts(labels_in_neighborhood)

        filenames = ["samples_" + str(qid + 1) + ".tmp" for qid in range(len(doc_to_neighborhood_dict))]
        with open(fileName, 'w') as outfile:
            for fname in filenames:
                with open(fname) as infile:
                    for line in infile:
                        outfile.write(line)
                outfile.write('\n')

        return doc_to_neighborhood_dict
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _distarray_missing(self, xc, xd, cdiffs):
        """Distance array for data with missing values"""
        cindices = []
        dindices = []
        for i in range(self._datalen):
            cindices.append(np.where(np.isnan(xc[i]))[0])
            dindices.append(np.where(np.isnan(xd[i]))[0])

        if self.n_jobs != 1:
            dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
        else:
            dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)]

        return np.array(dist_array)
    #==================================================================#
############################# ReliefF ############################################
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        sm = cnt = 0
        for i in range(self._datalen):
            sm += sum(self._distance_array[i])
            cnt += len(self._distance_array[i])
        avg_dist = sm / float(cnt)

        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
        NN_near_list = [i[0] for i in NNlist]
        NN_far_list = [i[1] for i in NNlist]

        if self.n_jobs != 1:
            scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
                SURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
        else:
            scores = np.sum([SURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)

        return np.array(scores)
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
        NN_near_list = [i[0] for i in NNlist]
        NN_far_list = [i[1] for i in NNlist]

        if self.n_jobs != 1:
            scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
                MultiSURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)

        else:
            scores = np.sum([MultiSURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)

        return np.array(scores)
项目:CheTo    作者:rdkit    | 项目源码 | 文件源码
def _generateFragments(self):
        voc=set(self.vocabulary)
        fpsdict = dict([(idx,{}) for idx in self.moldata.index])
        nrows = self.moldata.shape[0]
        counter = 0
        with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
            while counter < nrows:
                nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
                result = parallel(delayed(_generateMolFrags)(mollist, voc,
                                                    self.fragmentMethod, 
                                                    self.fragIdx)
                                   for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
                for r in result:
                    counter+=len(r)
                    fpsdict.update(r)            
        self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]                

    # construct the molecule-fragment matrix as input for the LDA algorithm
项目:ibex    作者:atavory    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, **fit_params):
        """
        Fits the transformer using ``X`` (and possibly ``y``). Transforms
        ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.

        Returns:

            ``self``
        """
        verify_x_type(X)
        verify_y_type(y)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
        return self.__concat(Xts)
项目:few    作者:lacava    | 项目源码 | 文件源码
def calc_fitness(self,X,labels,fit_choice,sel):
        """computes fitness of individual output yhat.
        yhat: output of a program.
        labels: correct outputs
        fit_choice: choice of fitness function
        """

        if 'lexicase' in sel:
            # return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
            return np.asarray(
                              [self.proper(self.f_vec[fit_choice](labels,
                                                        yhat)) for yhat in X],
                                                        order='F')
            # return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
        else:
            # return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
            return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
                            order='F').reshape(-1)

            # return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
                X, y, scorer, parameter_iterable, fit_params,
                error_score, cv, **kwargs):
        # test_score, n_samples, score_time, parameters
        return Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
            delayed(_fit_and_score)(
                clone(base_estimator), X, y, scorer,
                train, test, verbose, parameters,
                fit_params, return_parameters=True,
                error_score=error_score)
            for parameters in parameter_iterable
            for train, test in cv)
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def Parallel2(*args, **kwargs):
    kwargs['backend'] = None
    return Parallel(*args, **kwargs)
项目:decoding-brain-challenge-2016    作者:alexandrebarachant    | 项目源码 | 文件源码
def fit(self, X, y, sample_weight=None):
        """Fit (estimates) the centroids.

        Parameters
        ----------
        X : ndarray, shape (n_trials, n_channels, n_channels)
            ndarray of SPD matrices.
        y : ndarray shape (n_trials, 1)
            labels corresponding to each trial.
        sample_weight : None | ndarray shape (n_trials, 1)
            the weights of each sample. if None, each sample is treated with
            equal weights.

        Returns
        -------
        self : MDM instance
            The MDM instance.
        """
        self.classes_ = numpy.unique(y)

        self.covmeans_ = []

        if sample_weight is None:
            sample_weight = numpy.ones(X.shape[0])

        if self.n_jobs == 1:
            for l in self.classes_:
                self.covmeans_.append(
                    mean_covariance(X[y == l], metric=self.metric_mean,
                                    sample_weight=sample_weight[y == l]))
        else:
            self.covmeans_ = Parallel(n_jobs=self.n_jobs)(
                delayed(mean_covariance)(X[y == l], metric=self.metric_mean,
                                         sample_weight=sample_weight[y == l])
                for l in self.classes_)

        return self
项目:decoding-brain-challenge-2016    作者:alexandrebarachant    | 项目源码 | 文件源码
def _predict_distances(self, covtest):
        """Helper to predict the distance. equivalent to transform."""
        Nc = len(self.covmeans_)

        if self.n_jobs == 1:
            dist = [distance(covtest, self.covmeans_[m], self.metric_dist)
                    for m in range(Nc)]
        else:
            dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)(
                covtest, self.covmeans_[m], self.metric_dist)
                for m in range(Nc))

        dist = numpy.concatenate(dist, axis=1)
        return dist
项目:autoreject    作者:autoreject    | 项目源码 | 文件源码
def fit(self, epochs):
        self.picks = _handle_picks(info=epochs.info, picks=self.picks)
        _check_data(epochs, picks=self.picks,
                    ch_constraint='single_channel_type', verbose=self.verbose)
        self.ch_type = _get_channel_type(epochs, self.picks)
        n_epochs = len(epochs)
        self.ch_subsets_ = self._get_random_subsets(epochs.info)
        self.mappings_ = self._get_mappings(epochs)

        n_jobs = check_n_jobs(self.n_jobs)
        parallel = Parallel(n_jobs, verbose=10)
        my_iterator = delayed(_iterate_epochs)
        if self.verbose is not False and self.n_jobs > 1:
            print('Iterating epochs ...')
        verbose = False if self.n_jobs > 1 else self.verbose
        corrs = parallel(my_iterator(self, epochs, idxs, verbose)
                         for idxs in np.array_split(np.arange(n_epochs),
                         n_jobs))
        self.corr_ = np.concatenate(corrs)
        if self.verbose is not False and self.n_jobs > 1:
            print('[Done]')

        # compute how many windows is a sensor RANSAC-bad
        self.bad_log = np.zeros_like(self.corr_)
        self.bad_log[self.corr_ < self.min_corr] = 1
        bad_log = self.bad_log.sum(axis=0)

        bad_idx = np.where(bad_log > self.unbroken_time * n_epochs)[0]
        if len(bad_idx) > 0:
            self.bad_chs_ = [
                epochs.info['ch_names'][self.picks[p]] for p in bad_idx]
        else:
            self.bad_chs_ = []
        return self
项目:skggm    作者:skggm    | 项目源码 | 文件源码
def _cpu_map(fun, param_grid, n_jobs, verbose=True):
    return Parallel(
        n_jobs=n_jobs,
        verbose=verbose,
        backend='threading',  # any sklearn backend should work here
    )(
        delayed(fun)(
            params
        )
        for params in param_grid)
项目:skggm    作者:skggm    | 项目源码 | 文件源码
def _cpu_map(fun, param_grid, n_jobs, verbose):
    return Parallel(
        n_jobs=n_jobs,
        verbose=verbose,
        backend='threading',  # any sklearn backend should work here
    )(
        delayed(fun)(
            params
        )
        for params in param_grid)
项目:DTW_physionet2016    作者:JJGO    | 项目源码 | 文件源码
def search_test_params(base_clf, cv_params, X, y, train, test, scoring):
    parameter_iterable = ParameterGrid(cv_params)
    grid_scores = Parallel(n_jobs=-1)(
        delayed(_fit_and_score)(clone(base_clf), X, y, scoring,
                                train, test, 0, parameters,
                                None, return_parameters=True)
            for parameters in parameter_iterable)
    # grid_scores = [_fit_and_score(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable]
    grid_scores = sorted(grid_scores, key=lambda x: x[0], reverse=True)
    scores, _, _, parameters = grid_scores[0]
    return scores, parameters
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def score(self, imgs, confounds=None):
        """
        Score the images on the learning spatial pipelining, based on the
        objective function value that is minimized by the algorithm. Lower
        means better fit.

        Parameters
        ----------
        imgs: list of Niimg-like objects
            See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
            Data on which PCA must be calculated. If this is a list,
            the affine is considered the same for all.

        confounds: CSV file path or 2D matrix
            This parameter is passed to nilearn.signal.clean. Please see the
            related documentation for details

        Returns
        -------
        score: float
            Average score on all input data
        """
        if (isinstance(imgs, str) or not hasattr(imgs, '__iter__')):
            imgs = [imgs]
        if confounds is None:
            confounds = itertools.repeat(None)
        scores = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
            delayed(self._cache(_score_img, func_memory_level=1))(
                self.coder_, self.masker_, img, these_confounds)
            for img, these_confounds in zip(imgs, confounds))
        scores = np.array(scores)
        try:
            len_imgs = np.array([check_niimg(img).get_shape()[3]
                                 for img in imgs])
        except ImageFileError:
            len_imgs = np.array([np.load(img, mmap_mode='r').shape[0]
                                 for img in imgs])
        score = np.sum(scores * len_imgs) / np.sum(len_imgs)
        return score
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def transform(self, imgs, confounds=None):
        """Compute the mask and the ICA maps across subjects

        Parameters
        ----------
        batch_size
        imgs: list of Niimg-like objects
            See http://nilearn.github.io/building_blocks/manipulating_mr_images.html#niimg.
            Data on which PCA must be calculated. If this is a list,
            the affine is considered the same for all.

        confounds: CSV file path or 2D matrix
            This parameter is passed to nilearn.signal.clean. Please see the
            related documentation for details

        Returns
        -------
        codes, list of ndarray, shape = n_images * (n_samples, n_components)
            Loadings for each of the images, and each of the time steps
        """
        if (isinstance(imgs, str) or not hasattr(imgs, '__iter__')):
            imgs = [imgs]
        if confounds is None:
            confounds = itertools.repeat(None)
        codes = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
            delayed(self._cache(_transform_img, func_memory_level=1))(
                self.coder_, self.masker_, img, these_confounds)
            for img, these_confounds in zip(imgs, confounds))
        return codes
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
                    n_jobs=1, verbose=0, fit_params=None,
                    pre_dispatch='2*n_jobs'):
    """
    Evaluate a score by cross-validation
    """
    if not isinstance(scoring, (list, tuple)):
        scoring = [scoring]

    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    splits = list(cv.split(X, y, groups))
    scorer = [check_scoring(estimator, scoring=s) for s in scoring]
    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
                                              train, test, verbose, None,
                                              fit_params)
                      for train, test in splits)

    group_order = []
    if hasattr(cv, 'groups'):
        group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
    return np.squeeze(np.array(scores)), group_order
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def permutation_test_score(estimator, X, y, groups=None, cv=None,
                           n_permutations=100, n_jobs=1, random_state=0,
                           verbose=0, scoring=None):
    """
    Evaluate the significance of a cross-validated score with permutations,
    as in test 1 of [Ojala2010]_.

    A modification of original sklearn's permutation test score function
    to evaluate p-value outside this function, so that the score can be
    reused from outside.


    .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
                   Performance.  The Journal of Machine Learning Research (2010)
                   vol. 11

    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer = check_scoring(estimator, scoring=scoring)
    random_state = check_random_state(random_state)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_permutation_test_score)(
            clone(estimator), X, _shuffle(y, groups, random_state),
            groups, cv, scorer)
        for _ in range(n_permutations))
    permutation_scores = np.array(permutation_scores)
    return permutation_scores
项目:kenchi    作者:Y-oHr-N    | 项目源码 | 文件源码
def anomaly_score(self, X=None):
        """Compute anomaly scores for test samples.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features), default None
            Test samples.

        Returns
        -------
        y_score : array-like of shape (n_samples,)
            Anomaly scores for test samples.
        """

        check_is_fitted(self, '_knn')

        if X is None:
            X        = self._knn._fit_X
            ind      = self._knn.kneighbors(None, return_distance=False)
        else:
            X        = check_array(X)
            ind      = self._knn.kneighbors(X, return_distance=False)

        n_samples, _ = X.shape

        try:
            result   = Parallel(self.n_jobs)(
                delayed(_abof)(
                    X[s], ind[s], self._knn._fit_X
                ) for s in gen_even_slices(n_samples, self.n_jobs)
            )
        except FloatingPointError as e:
            raise ValueError('X must not contain training samples') from e

        return -np.concatenate(result)
项目:sentence-classification    作者:bgmartins    | 项目源码 | 文件源码
def _pairwise_wmd(self, X_test, X_train=None):
        """Computes the word mover's distance between all train and test points.

        Parallelized over rows of X_test.

        Assumes that train and test samples are sparse BOW vectors summing to 1.

        Parameters
        ----------
        X_test: scipy.sparse matrix, shape: (n_test_samples, vocab_size)
            Test samples.

        X_train: scipy.sparse matrix, shape: (n_train_samples, vocab_size)
            Training samples. If `None`, uses the samples the estimator was fit with.

        Returns
        -------
        dist : array, shape: (n_test_samples, n_train_samples)
            Distances between all test samples and all train samples.

        """
        n_samples_test = X_test.shape[0]
        if X_train is None: X_train = self._fit_X
        if self.n_jobs == 1: dist = [ self._wmd_row( test_sample , X_train ) for test_sample in X_test ]
        else: dist = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._wmd_row) (test_sample, X_train) for test_sample in X_test)
        return np.array(dist)
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def _extract_features(self, topNIndices, topNDistances, y, distances):
        samples = self._split_samples(topNIndices, topNDistances, y)
        training_data_list = Parallel(n_jobs=self.n_jobs)(
            delayed(_analyze)(tI, tD, y, distances, self.dependencies) for tI, tD, y in samples)

        # merge training data
        training_data = defaultdict(list)
        for training_data_dict in training_data_list:
            for label, training_samples_of_label in training_data_dict.items():
                training_data[label].extend(training_data_dict[label])
        return training_data
项目:scikit-optimize    作者:scikit-optimize    | 项目源码 | 文件源码
def fit(self, X, y):
        """Fit one regressor for each quantile.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            Training vectors, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        * `y` [array-like, shape=(n_samples,)]:
            Target values (real numbers in regression)
        """
        rng = check_random_state(self.random_state)

        if self.base_estimator is None:
            base_estimator = GradientBoostingRegressor(loss='quantile')
        else:
            base_estimator = self.base_estimator

            if not isinstance(base_estimator, GradientBoostingRegressor):
                raise ValueError('base_estimator has to be of type'
                                 ' GradientBoostingRegressor.')

            if not base_estimator.loss == 'quantile':
                raise ValueError('base_estimator has to use quantile'
                                 ' loss not %s' % base_estimator.loss)

        # The predictions for different quantiles should be sorted.
        # Therefore each of the regressors need the same seed.
        base_estimator.set_params(random_state=rng)
        regressors = []
        for q in self.quantiles:
            regressor = clone(base_estimator)
            regressor.set_params(alpha=q)

            regressors.append(regressor)

        self.regressors_ = Parallel(n_jobs=self.n_jobs, backend='threading')(
            delayed(_parallel_fit)(regressor, X, y)
            for regressor in regressors)

        return self
项目:Bacchus    作者:surfstudio    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, **fit_params):
        self._validate_transformers()
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(trans, name, weight, X, y, **fit_params)
            for name, trans, weight in self._iter())
        to_concats = [r[0] for r in result]
        if self.drop:
            return self._concat_just_right(to_concats[0], to_concats[1:])
        return self._concat_just_right(X, to_concats)
项目:Bacchus    作者:surfstudio    | 项目源码 | 文件源码
def transform(self, X):
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, name, weight, X)
            for name, trans, weight in self._iter())
        if self.drop:
            return self._concat_just_right(Xs[0], Xs[1:])
        return self._concat_just_right(X, Xs)
项目:Bacchus    作者:surfstudio    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, **fit_params):
        self._validate_transformers()
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(trans, name, weight, X, y, **fit_params)
            for name, trans, weight in self._iter())
        Xs = [r[0] for r in result]
        Xs.insert(0, X)
        return pd.concat(Xs, axis=0)
项目:Bacchus    作者:surfstudio    | 项目源码 | 文件源码
def transform(self, X):
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, name, weight, X)
            for name, trans, weight in self._iter())
        Xs.insert(0, X)
        return pd.concat(Xs, axis=0)
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = map(self._find_neighbors, range(self._datalen))
        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            ReliefF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
            NN, self._headers, self._class_type, self._X, self._y, self._labels_std)
             for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores)
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]

        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            MultiSURF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
            NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std)
            for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores)
项目:ibex    作者:atavory    | 项目源码 | 文件源码
def transform(self, X, *args, **kwargs):
        """
        Transforms ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.
        """
        verify_x_type(X)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter())
        return self.__concat(Xts)
项目:few    作者:lacava    | 项目源码 | 文件源码
def transform(self,x,inds=None,labels = None):
        """return a transformation of x using population outputs"""
        if inds:
            # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) 
            #                           for I in inds)).transpose()
            return np.asarray(
                [self.out(I,x,labels,self.otype) for I in inds]).transpose()
        elif self._best_inds:
            # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) 
            #                                   for I in self._best_inds)).transpose()
            return np.asarray(
                [self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose()
        else:
            return x
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _fit_multiclass(self, X, y, alpha, C, learning_rate,
                        sample_weight, n_iter):
        """Fit a multi-class classifier by combining binary classifiers

        Each binary classifier predicts one class versus all others. This
        strategy is called OVA: One Versus All.
        """
        # Use joblib to fit OvA in parallel.
        result = Parallel(n_jobs=self.n_jobs, backend="threading",
                          verbose=self.verbose)(
                delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
                                    n_iter, self._expanded_class_weight[i], 1.,
                                    sample_weight)
                for i in range(len(self.classes_)))

        for i, (_, intercept) in enumerate(result):
            self.intercept_[i] = intercept

        self.t_ += n_iter * X.shape[0]

        if self.average > 0:
            if self.average <= self.t_ - 1.0:
                self.coef_ = self.average_coef_
                self.intercept_ = self.average_intercept_
            else:
                self.coef_ = self.standard_coef_
                self.standard_intercept_ = np.atleast_1d(self.intercept_)
                self.intercept_ = self.standard_intercept_
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _fit_multiclass(self, X, y, alpha, C, learning_rate,
                        sample_weight, n_iter):
        """Fit a multi-class classifier by combining binary classifiers

        Each binary classifier predicts one class versus all others. This
        strategy is called OVA: One Versus All.
        """
        # Use joblib to fit OvA in parallel.
        result = Parallel(n_jobs=self.n_jobs, backend="threading",
                          verbose=self.verbose)(
                delayed(fit_binary)(self, i, X, y, alpha, C, learning_rate,
                                    n_iter, self._expanded_class_weight[i], 1.,
                                    sample_weight)
                for i in range(len(self.classes_)))

        for i, (_, intercept) in enumerate(result):
            self.intercept_[i] = intercept

        self.t_ += n_iter * X.shape[0]

        if self.average > 0:
            if self.average <= self.t_ - 1.0:
                self.coef_ = self.average_coef_
                self.intercept_ = self.average_intercept_
            else:
                self.coef_ = self.standard_coef_
                self.standard_intercept_ = np.atleast_1d(self.intercept_)
                self.intercept_ = self.standard_intercept_
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def get_mi_vector(MI_FS, F, s):
    """
    Calculates the Mututal Information between each feature in F and s.

    This function is for when |S| > 1. s is the previously selected feature.
    We exploite the fact that this step is embarrassingly parallel.
    """

    MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS)
                                        for f in F)
    return MIs
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def get_first_mi_vector(MI_FS, k):
    """
    Calculates the Mututal Information between each feature in X and y.

    This function is for when |S| = 0. We select the first feautre in S.
    """
    n, p = MI_FS.X.shape
    MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS)
                                        for i in xrange(p))
    return MIs
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1,
                       mmap_mode=None):
        """Prepare multi subject data in parallel

        Parameters
        ----------

        imgs_list: list of Niimg-like objects
            See http://nilearn.github.io/manipulating_images/input_output.html.
            List of imgs file to prepare. One item per subject.

        confounds: list of confounds, optional
            List of confounds (2D arrays or filenames pointing to CSV
            files). Must be of same length than imgs_list.

        copy: boolean, optional
            If True, guarantees that output array has no memory in common with
            input array.

        n_jobs: integer, optional
            The number of cpus to use to do the computation. -1 means
            'all cpus'.

        Returns
        -------
        region_signals: list of 2D numpy.ndarray
            List of signal for each element per subject.
            shape: list of (number of scans, number of elements)
        """
        self._check_fitted()
        raw = True
        # Check whether all imgs from imgs_list are numpy instance, or fallback
        # to MultiNiftiMasker (could handle hybrid imgs_list but we do not
        #  need it for the moment)
        for imgs in imgs_list:
            if isinstance(imgs, str):
                name, ext = os.path.splitext(imgs)
                if ext != '.npy':
                    raw = False
                    break
            elif not isinstance(imgs, np.ndarray):
                raw = False
                break
        if raw:
            data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs,
                                                            mmap_mode=mmap_mode)
                                           for imgs in imgs_list)
            return data
        else:
            return MultiNiftiMasker.transform_imgs(self, imgs_list,
                                                   confounds=confounds,
                                                   copy=copy,
                                                   n_jobs=n_jobs, )
项目:CerebralCortex-2.0-legacy    作者:MD2Korg    | 项目源码 | 文件源码
def fit(self, X, y):
        """Actual fitting,  performing the search over parameters."""

        parameter_iterable = ParameterGrid(self.param_grid)

        estimator = self.estimator
        cv = self.cv

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                 'of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))
        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
                                      parameters, cv=cv)
            for parameters in parameter_iterable)

        best = sorted(out, key=lambda x: x[0])[-1]
        self.best_params_ = best[1]
        self.best_score_ = best[0]

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best[1])
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self
项目:CerebralCortex-2.0-legacy    作者:MD2Korg    | 项目源码 | 文件源码
def fit(self, X, y):
        """Actual fitting,  performing the search over parameters."""

        parameter_iterable = ParameterSampler(self.param_distributions,
                                              self.n_iter,
                                              random_state=self.random_state)
        estimator = self.estimator
        cv = self.cv

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                 'of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))
        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(
            delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
                                      parameters, cv=cv)
            for parameters in parameter_iterable)

        best = sorted(out, reverse=True)[0]
        self.best_params_ = best[1]
        self.best_score_ = best[0]

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best[1])
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self
项目:scikit-optimize    作者:scikit-optimize    | 项目源码 | 文件源码
def run(n_calls=32, n_runs=1, save_traces=True, n_jobs=1):
    """
    Main function used to run the experiments.

    Parameters
    ----------
    * `n_calls`: int
        Evaluation budget.

    * `n_runs`: int
        Number of times to repeat the optimization in order to average out noise.

    * `save_traces`: bool
        Whether or not to save data collected during optimization

    * `n_jobs`: int
        Number of different repeats of optimization to run in parallel.
    """
    surrogate_minimizers = [gbrt_minimize, forest_minimize, gp_minimize]
    selected_models = sorted(MODELS, key=lambda x: x.__name__)
    selected_datasets = (DATASETS.keys())

    # all the parameter values and objectives collected during execution are stored in list below
    all_data = {}
    for model in selected_models:
        all_data[model] = {}

        for dataset in selected_datasets:
            if not issubclass(model, DATASETS[dataset]):
                continue

            all_data[model][dataset] = {}
            for surrogate_minimizer in surrogate_minimizers:
                print(surrogate_minimizer.__name__, model.__name__, dataset)
                seeds = np.random.randint(0, 2**30, n_runs)
                raw_trace = Parallel(n_jobs=n_jobs)(
                    delayed(evaluate_optimizer)(
                        surrogate_minimizer, model, dataset, n_calls, seed
                    ) for seed in seeds
                )
                all_data[model][dataset][surrogate_minimizer.__name__] = raw_trace

    # convert the model keys to strings so that results can be saved as json
    all_data = {k.__name__: v for k,v in all_data.items()}

    # dump the recorded objective values as json
    if save_traces:
        with open(datetime.now().strftime("%m_%Y_%d_%H_%m_%s")+'.json', 'w') as f:
            json.dump(all_data, f)
    calculate_performance(all_data)