Python sklearn.externals.joblib 模块,delayed() 实例源码

我们从Python开源项目中,提取了以下24个代码示例,用于说明如何使用sklearn.externals.joblib.delayed()

项目:elephant_sense    作者:chakki-works    | 项目源码 | 文件源码
def post(self):
        data = tornado.escape.json_decode(self.request.body)
        is_debug = data["debug"]
        query = data["query"]
        message = {"posts": []}
        if is_debug:
            from elephant_sense.debug import search_posts_dummy
            posts = search_posts_dummy(query, count=30)
            posts = self.scoring(posts)
            message["posts"] = [self.trim(p) for p in posts]
            self.write(message)
        else:
            posts = search_posts(query, n=50)  # limit for performance. need improvements for feature extraction.
            process = 4
            batch_size = len(posts) / process
            tasks = [(int(i * batch_size), int(i * batch_size + batch_size)) for i in range(process)]
            dones = Parallel(n_jobs=process)(delayed(parallel_scoring)(self.evaluator, posts[t[0]:t[1]]) for t in tasks)
            posts = []
            for scoreds in dones:
                posts += [self.trim(s) for s in scoreds]
            posts = sorted(posts, key=lambda p: p["score"], reverse=True)
            message["posts"] = posts
            self.write(message)
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def run(n_seeds, n_jobs, _run, _seed):
    seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
                                                  size=n_seeds)
    exps = []
    exps += [{'method': 'sgd',
              'step_size': step_size}
             for step_size in np.logspace(-3, 3, 7)]
    exps += [{'method': 'gram',
             'reduction': reduction}
            for reduction in [1, 4, 6, 8, 12, 24]]

    rundir = join(basedir, str(_run._id), 'run')
    if not os.path.exists(rundir):
        os.makedirs(rundir)

    Parallel(n_jobs=n_jobs,
             verbose=10)(delayed(single_run)(config_updates, rundir, i)
                         for i, config_updates in enumerate(exps))
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def run(n_seeds, n_jobs, _run, _seed):
    seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
                                                  size=n_seeds)
    exps = []
    exps += [{'method': 'sgd',
              'step_size': step_size}
             for step_size in np.logspace(-7, -7, 1)]
    exps += [{'method': 'gram',
              'reduction': reduction}
             for reduction in [12]]

    rundir = join(basedir, str(_run._id), 'run')
    if not os.path.exists(rundir):
        os.makedirs(rundir)

    Parallel(n_jobs=n_jobs,
             verbose=10)(delayed(single_run)(config_updates, rundir, i)
                         for i, config_updates in enumerate(exps))
项目:dancedeets-monorepo    作者:mikelambert    | 项目源码 | 文件源码
def _compute_features(self, raw_documents):

        values = array.array(str("f"))
        print "Preloading regexes"
        dummy_processor = event_classifier.StringProcessor('')
        for name, rule in named_rules_list:
            dummy_processor.count_tokens(rule)

        print "Computing Features"
        result = Parallel(
            n_jobs=7 if process_all else 1, verbose=10
        )(delayed(process_doc)(fb_event) for event_id, fb_event in raw_documents)
        for row_values in result:
            values.extend(row_values)

        X = np.array(values)
        X.shape = (len(raw_documents), len(self.features))

        return X
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _distarray_missing(self, xc, xd, cdiffs):
        """Distance array for data with missing values"""
        cindices = []
        dindices = []
        for i in range(self._datalen):
            cindices.append(np.where(np.isnan(xc[i]))[0])
            dindices.append(np.where(np.isnan(xd[i]))[0])

        if self.n_jobs != 1:
            dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
        else:
            dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen)]

        return np.array(dist_array)
    #==================================================================#
############################# ReliefF ############################################
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        sm = cnt = 0
        for i in range(self._datalen):
            sm += sum(self._distance_array[i])
            cnt += len(self._distance_array[i])
        avg_dist = sm / float(cnt)

        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
        NN_near_list = [i[0] for i in NNlist]
        NN_far_list = [i[1] for i in NNlist]

        if self.n_jobs != 1:
            scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
                SURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)
        else:
            scores = np.sum([SURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)

        return np.array(scores)
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]
        NN_near_list = [i[0] for i in NNlist]
        NN_far_list = [i[1] for i in NNlist]

        if self.n_jobs != 1:
            scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
                MultiSURFstar_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)), axis=0)

        else:
            scores = np.sum([MultiSURFstar_compute_scores(instance_num, attr, nan_entries, self._num_attributes,
                NN_near, NN_far, self._headers, self._class_type, self._X, self._y, self._labels_std)
                 for instance_num, NN_near, NN_far in zip(range(self._datalen), NN_near_list, NN_far_list)], axis=0)

        return np.array(scores)
项目:CheTo    作者:rdkit    | 项目源码 | 文件源码
def _generateFragments(self):
        voc=set(self.vocabulary)
        fpsdict = dict([(idx,{}) for idx in self.moldata.index])
        nrows = self.moldata.shape[0]
        counter = 0
        with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
            while counter < nrows:
                nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
                result = parallel(delayed(_generateMolFrags)(mollist, voc,
                                                    self.fragmentMethod, 
                                                    self.fragIdx)
                                   for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
                for r in result:
                    counter+=len(r)
                    fpsdict.update(r)            
        self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]                

    # construct the molecule-fragment matrix as input for the LDA algorithm
项目:ibex    作者:atavory    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, **fit_params):
        """
        Fits the transformer using ``X`` (and possibly ``y``). Transforms
        ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.

        Returns:

            ``self``
        """
        verify_x_type(X)
        verify_y_type(y)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
        return self.__concat(Xts)
项目:few    作者:lacava    | 项目源码 | 文件源码
def calc_fitness(self,X,labels,fit_choice,sel):
        """computes fitness of individual output yhat.
        yhat: output of a program.
        labels: correct outputs
        fit_choice: choice of fitness function
        """

        if 'lexicase' in sel:
            # return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
            return np.asarray(
                              [self.proper(self.f_vec[fit_choice](labels,
                                                        yhat)) for yhat in X],
                                                        order='F')
            # return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
        else:
            # return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
            return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
                            order='F').reshape(-1)

            # return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
项目:decoding-brain-challenge-2016    作者:alexandrebarachant    | 项目源码 | 文件源码
def fit(self, X, y, sample_weight=None):
        """Fit (estimates) the centroids.

        Parameters
        ----------
        X : ndarray, shape (n_trials, n_channels, n_channels)
            ndarray of SPD matrices.
        y : ndarray shape (n_trials, 1)
            labels corresponding to each trial.
        sample_weight : None | ndarray shape (n_trials, 1)
            the weights of each sample. if None, each sample is treated with
            equal weights.

        Returns
        -------
        self : MDM instance
            The MDM instance.
        """
        self.classes_ = numpy.unique(y)

        self.covmeans_ = []

        if sample_weight is None:
            sample_weight = numpy.ones(X.shape[0])

        if self.n_jobs == 1:
            for l in self.classes_:
                self.covmeans_.append(
                    mean_covariance(X[y == l], metric=self.metric_mean,
                                    sample_weight=sample_weight[y == l]))
        else:
            self.covmeans_ = Parallel(n_jobs=self.n_jobs)(
                delayed(mean_covariance)(X[y == l], metric=self.metric_mean,
                                         sample_weight=sample_weight[y == l])
                for l in self.classes_)

        return self
项目:decoding-brain-challenge-2016    作者:alexandrebarachant    | 项目源码 | 文件源码
def _predict_distances(self, covtest):
        """Helper to predict the distance. equivalent to transform."""
        Nc = len(self.covmeans_)

        if self.n_jobs == 1:
            dist = [distance(covtest, self.covmeans_[m], self.metric_dist)
                    for m in range(Nc)]
        else:
            dist = Parallel(n_jobs=self.n_jobs)(delayed(distance)(
                covtest, self.covmeans_[m], self.metric_dist)
                for m in range(Nc))

        dist = numpy.concatenate(dist, axis=1)
        return dist
项目:autoreject    作者:autoreject    | 项目源码 | 文件源码
def fit(self, epochs):
        self.picks = _handle_picks(info=epochs.info, picks=self.picks)
        _check_data(epochs, picks=self.picks,
                    ch_constraint='single_channel_type', verbose=self.verbose)
        self.ch_type = _get_channel_type(epochs, self.picks)
        n_epochs = len(epochs)
        self.ch_subsets_ = self._get_random_subsets(epochs.info)
        self.mappings_ = self._get_mappings(epochs)

        n_jobs = check_n_jobs(self.n_jobs)
        parallel = Parallel(n_jobs, verbose=10)
        my_iterator = delayed(_iterate_epochs)
        if self.verbose is not False and self.n_jobs > 1:
            print('Iterating epochs ...')
        verbose = False if self.n_jobs > 1 else self.verbose
        corrs = parallel(my_iterator(self, epochs, idxs, verbose)
                         for idxs in np.array_split(np.arange(n_epochs),
                         n_jobs))
        self.corr_ = np.concatenate(corrs)
        if self.verbose is not False and self.n_jobs > 1:
            print('[Done]')

        # compute how many windows is a sensor RANSAC-bad
        self.bad_log = np.zeros_like(self.corr_)
        self.bad_log[self.corr_ < self.min_corr] = 1
        bad_log = self.bad_log.sum(axis=0)

        bad_idx = np.where(bad_log > self.unbroken_time * n_epochs)[0]
        if len(bad_idx) > 0:
            self.bad_chs_ = [
                epochs.info['ch_names'][self.picks[p]] for p in bad_idx]
        else:
            self.bad_chs_ = []
        return self
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None,
                    n_jobs=1, verbose=0, fit_params=None,
                    pre_dispatch='2*n_jobs'):
    """
    Evaluate a score by cross-validation
    """
    if not isinstance(scoring, (list, tuple)):
        scoring = [scoring]

    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    splits = list(cv.split(X, y, groups))
    scorer = [check_scoring(estimator, scoring=s) for s in scoring]
    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer,
                                              train, test, verbose, None,
                                              fit_params)
                      for train, test in splits)

    group_order = []
    if hasattr(cv, 'groups'):
        group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits]
    return np.squeeze(np.array(scores)), group_order
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def permutation_test_score(estimator, X, y, groups=None, cv=None,
                           n_permutations=100, n_jobs=1, random_state=0,
                           verbose=0, scoring=None):
    """
    Evaluate the significance of a cross-validated score with permutations,
    as in test 1 of [Ojala2010]_.

    A modification of original sklearn's permutation test score function
    to evaluate p-value outside this function, so that the score can be
    reused from outside.


    .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier
                   Performance.  The Journal of Machine Learning Research (2010)
                   vol. 11

    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorer = check_scoring(estimator, scoring=scoring)
    random_state = check_random_state(random_state)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
        delayed(_permutation_test_score)(
            clone(estimator), X, _shuffle(y, groups, random_state),
            groups, cv, scorer)
        for _ in range(n_permutations))
    permutation_scores = np.array(permutation_scores)
    return permutation_scores
项目:sentence-classification    作者:bgmartins    | 项目源码 | 文件源码
def _pairwise_wmd(self, X_test, X_train=None):
        """Computes the word mover's distance between all train and test points.

        Parallelized over rows of X_test.

        Assumes that train and test samples are sparse BOW vectors summing to 1.

        Parameters
        ----------
        X_test: scipy.sparse matrix, shape: (n_test_samples, vocab_size)
            Test samples.

        X_train: scipy.sparse matrix, shape: (n_train_samples, vocab_size)
            Training samples. If `None`, uses the samples the estimator was fit with.

        Returns
        -------
        dist : array, shape: (n_test_samples, n_train_samples)
            Distances between all test samples and all train samples.

        """
        n_samples_test = X_test.shape[0]
        if X_train is None: X_train = self._fit_X
        if self.n_jobs == 1: dist = [ self._wmd_row( test_sample , X_train ) for test_sample in X_test ]
        else: dist = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._wmd_row) (test_sample, X_train) for test_sample in X_test)
        return np.array(dist)
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = map(self._find_neighbors, range(self._datalen))
        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            ReliefF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
            NN, self._headers, self._class_type, self._X, self._y, self._labels_std)
             for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores)
项目:scikit-rebate    作者:EpistasisLab    | 项目源码 | 文件源码
def _run_algorithm(self):
        attr = self._get_attribute_info()
        nan_entries = np.isnan(self._X)

        NNlist = [self._find_neighbors(datalen) for datalen in range(self._datalen)]

        scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
            MultiSURF_compute_scores)(instance_num, attr, nan_entries, self._num_attributes,
            NN_near, self._headers, self._class_type, self._X, self._y, self._labels_std)
            for instance_num, NN_near in zip(range(self._datalen), NNlist)), axis=0)

        return np.array(scores)
项目:ibex    作者:atavory    | 项目源码 | 文件源码
def transform(self, X, *args, **kwargs):
        """
        Transforms ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.
        """
        verify_x_type(X)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter())
        return self.__concat(Xts)
项目:few    作者:lacava    | 项目源码 | 文件源码
def transform(self,x,inds=None,labels = None):
        """return a transformation of x using population outputs"""
        if inds:
            # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) 
            #                           for I in inds)).transpose()
            return np.asarray(
                [self.out(I,x,labels,self.otype) for I in inds]).transpose()
        elif self._best_inds:
            # return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype) 
            #                                   for I in self._best_inds)).transpose()
            return np.asarray(
                [self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose()
        else:
            return x
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def get_mi_vector(MI_FS, F, s):
    """
    Calculates the Mututal Information between each feature in F and s.

    This function is for when |S| > 1. s is the previously selected feature.
    We exploite the fact that this step is embarrassingly parallel.
    """

    MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_mi)(f, s, MI_FS)
                                        for f in F)
    return MIs
项目:mifs    作者:danielhomola    | 项目源码 | 文件源码
def get_first_mi_vector(MI_FS, k):
    """
    Calculates the Mututal Information between each feature in X and y.

    This function is for when |S| = 0. We select the first feautre in S.
    """
    n, p = MI_FS.X.shape
    MIs = Parallel(n_jobs=MI_FS.n_jobs)(delayed(_get_first_mi)(i, k, MI_FS)
                                        for i in xrange(p))
    return MIs
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def transform_imgs(self, imgs_list, confounds=None, copy=True, n_jobs=1,
                       mmap_mode=None):
        """Prepare multi subject data in parallel

        Parameters
        ----------

        imgs_list: list of Niimg-like objects
            See http://nilearn.github.io/manipulating_images/input_output.html.
            List of imgs file to prepare. One item per subject.

        confounds: list of confounds, optional
            List of confounds (2D arrays or filenames pointing to CSV
            files). Must be of same length than imgs_list.

        copy: boolean, optional
            If True, guarantees that output array has no memory in common with
            input array.

        n_jobs: integer, optional
            The number of cpus to use to do the computation. -1 means
            'all cpus'.

        Returns
        -------
        region_signals: list of 2D numpy.ndarray
            List of signal for each element per subject.
            shape: list of (number of scans, number of elements)
        """
        self._check_fitted()
        raw = True
        # Check whether all imgs from imgs_list are numpy instance, or fallback
        # to MultiNiftiMasker (could handle hybrid imgs_list but we do not
        #  need it for the moment)
        for imgs in imgs_list:
            if isinstance(imgs, str):
                name, ext = os.path.splitext(imgs)
                if ext != '.npy':
                    raw = False
                    break
            elif not isinstance(imgs, np.ndarray):
                raw = False
                break
        if raw:
            data = Parallel(n_jobs=n_jobs)(delayed(np.load)(imgs,
                                                            mmap_mode=mmap_mode)
                                           for imgs in imgs_list)
            return data
        else:
            return MultiNiftiMasker.transform_imgs(self, imgs_list,
                                                   confounds=confounds,
                                                   copy=copy,
                                                   n_jobs=n_jobs, )
项目:CerebralCortex-2.0-legacy    作者:MD2Korg    | 项目源码 | 文件源码
def fit(self, X, y):
        """Actual fitting,  performing the search over parameters."""

        parameter_iterable = ParameterGrid(self.param_grid)

        estimator = self.estimator
        cv = self.cv

        n_samples = _num_samples(X)
        X, y = indexable(X, y)

        if y is not None:
            if len(y) != n_samples:
                raise ValueError('Target variable (y) has a different number '
                                 'of samples (%i) than data (X: %i samples)'
                                 % (len(y), n_samples))
        cv = check_cv(cv, X, y, classifier=is_classifier(estimator))

        if self.verbose > 0:
            if isinstance(parameter_iterable, Sized):
                n_candidates = len(parameter_iterable)
                print("Fitting {0} folds for each of {1} candidates, totalling"
                      " {2} fits".format(len(cv), n_candidates,
                                         n_candidates * len(cv)))

        base_estimator = clone(self.estimator)

        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
                                      parameters, cv=cv)
            for parameters in parameter_iterable)

        best = sorted(out, key=lambda x: x[0])[-1]
        self.best_params_ = best[1]
        self.best_score_ = best[0]

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best[1])
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator

        return self