Python joblib 模块，delayed() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用joblib.delayed()。

项目：probabilistic-matrix-factorization 作者：aki-nishimura | 项目源码 | 文件源码

def update_row_param(self, phi_csr, mu0, c, v, r_prev, u_prev, phi_r, phi_u, num_process):

        nrow = self.y_csr.shape[0]

        # Update 'c' and 'v' block-wise in parallel.
        if num_process == 1:
            r, u = self.update_row_param_blockwise(self.y_csr, phi_csr, mu0, c, v, r_prev, u_prev, phi_r, phi_u)
        else:
            n_block = num_process
            block_ind = np.linspace(0, nrow, 1 + n_block, dtype=int)
            ru = joblib.Parallel(n_jobs=num_process)(
                joblib.delayed(self.update_row_param_blockwise)(
                    self.y_csr[block_ind[m]:block_ind[m + 1], :],
                    phi_csr[block_ind[m]:block_ind[m + 1], :],
                    mu0, c, v,
                    r_prev[block_ind[m]:block_ind[m + 1]],
                    u_prev[block_ind[m]:block_ind[m + 1]],
                    phi_r[block_ind[m]:block_ind[m + 1]],
                    phi_u)
                for m in range(n_block))
            r = np.concatenate([ru_i[0] for ru_i in ru])
            u = np.vstack([ru_i[1] for ru_i in ru])

        return r, u

项目：crema 作者：bmcfee | 项目源码 | 文件源码

def evaluate(input_path, n_jobs):

    aud, ann = zip(*crema.utils.get_ann_audio(input_path))

    test_idx = set(pd.read_json('index_test.json')['id'])

    # drop anything not in the test set
    ann = [ann_i for ann_i in ann if crema.utils.base(ann_i) in test_idx]
    aud = [aud_i for aud_i in aud if crema.utils.base(aud_i) in test_idx]

    stream = tqdm(zip(ann, aud), desc='Evaluating test set', total=len(ann))

    results = Parallel(n_jobs=n_jobs)(delayed(track_eval)(ann_i, aud_i)
                                      for ann_i, aud_i in stream)
    df = pd.DataFrame.from_dict(dict(results), orient='index')

    print('Results')
    print('-------')
    print(df.describe())

    df.to_json(os.path.join(OUTPUT_PATH, 'test_scores.json'))

项目：probabilistic-matrix-factorization 作者：aki-nishimura | 项目源码 | 文件源码

def update_col_param(self, phi_csc, mu0, r, u, c_prev, v_prev, phi_c, phi_v, num_process):

        ncol = self.y_csc.shape[1]

        if num_process == 1:
            c, v = self.update_col_param_blockwise(self.y_csc, phi_csc, mu0, r, u, c_prev, v_prev, phi_c, phi_v)
        else:
            # Update 'c' and 'v' block-wise in parallel.
            n_block = num_process
            block_ind = np.linspace(0, ncol, 1 + n_block, dtype=int)
            cv = joblib.Parallel(n_jobs=num_process)(
                joblib.delayed(self.update_col_param_blockwise)(
                    self.y_csc[:, block_ind[m]:block_ind[m + 1]],
                    phi_csc[:, block_ind[m]:block_ind[m + 1]],
                    mu0, r, u,
                    c_prev[block_ind[m]:block_ind[m + 1]],
                    v_prev[block_ind[m]:block_ind[m + 1]],
                    phi_c[block_ind[m]:block_ind[m + 1]],
                    phi_v)
                for m in range(n_block))
            c = np.concatenate([cv_j[0] for cv_j in cv])
            v = np.vstack([cv_j[1] for cv_j in cv])

        return c, v

项目：palladio 作者：slipguru | 项目源码 | 文件源码

def _fit_single_job(self, job_list, X, y):
        cv_results_ = {}
        # for i, (train_index, test_index) in job_list:
        #     LOG.info("Training fold %d", i + 1)
        #
        #     slave_result_ = self._worker(
        #         i, X, y, train_index, test_index)
        #
        #     _build_cv_results(cv_results_, **slave_result_)
        slave_results = jl.Parallel(n_jobs=self.n_jobs) \
            (jl.delayed(_worker)(
                self, i, X, y, train_index, test_index) for i, (
                    train_index, test_index) in job_list)
        for slave_result_ in slave_results:
            _build_cv_results(cv_results_, **slave_result_)

        self.cv_results_ = cv_results_

项目：GALEX 作者：rahul-aedula95 | 项目源码 | 文件源码

def degreetocart(data_f1):

    global df2
    df2 = data_f1.copy()
    print "phase 1"

    df2['X'] = np.nan
    df2['Y'] = np.nan
    df2['Z'] = np.nan
    df2 = df2.astype(float)

    print "phase 2"
    num_cores = multiprocessing.cpu_count()


    results_x = Parallel(n_jobs=num_cores)(delayed(xloop)(i) for i in xrange(0,len(df2)))
    print "phase 3"
    #print results_x

    #print results_x

    #print " this is "
    #print results_x[0]

    results_y = Parallel(n_jobs=num_cores)(delayed(yloop)(i) for i in xrange(0,len(df2)))
    print "phase 4"
    results_z = Parallel(n_jobs=num_cores)(delayed(zloop)(i) for i in xrange(0,len(df2)))
    print "phase 5"
    #print results_y



    #Parallel(n_jobs=num_cores)(delayed(adjloop)(i) for i in xrange(0,len(df2)))
    for i in xrange(0,len(df2)):
        print i
        df2['X'][i] = results_x[i]
        df2['Y'][i] = results_y[i]
        df2['Z'][i] = results_z[i]

项目：ParlAI 作者：facebookresearch | 项目源码 | 文件源码

def shutdown(self):
        """Shutdown all mturk agents in parallel, otherwise if one mturk agent
        is disconnected then it could prevent other mturk agents from
        completing.
        """
        global shutdown_agent

        def shutdown_agent(agent):
            try:
                agent.shutdown(timeout=None)
            except Exception:
                agent.shutdown()  # not MTurkAgent
        Parallel(
            n_jobs=len(self.agents),
            backend='threading'
        )(delayed(shutdown_agent)(agent) for agent in self.agents)

项目：ParlAI 作者：facebookresearch | 项目源码 | 文件源码

def shutdown(self):
        """Shutdown all mturk agents in parallel, otherwise if one mturk agent
        is disconnected then it could prevent other mturk agents from
        completing.
        """
        global shutdown_agent

        def shutdown_agent(agent):
            try:
                agent.shutdown(timeout=None)
            except Exception:
                agent.shutdown()  # not MTurkAgent
        Parallel(
            n_jobs=len(self.agents),
            backend='threading'
        )(delayed(shutdown_agent)(agent) for agent in self.agents)

项目：tf-cnn-lstm-ocr-captcha 作者：Luonic | 项目源码 | 文件源码

def get_distilled_labels(filenames):
    result_labels = []
    print("Creating labels")
    result_labels = Parallel(n_jobs=num_cores)(delayed(make_label)(long_filename) for long_filename in tqdm(filenames))
    return result_labels

# This function recives paths to images and lines from file with labels
# and returns only path to images that have corresponding label

项目：srep 作者：Answeror | 项目源码 | 文件源码

def _get_data(path, preprocess):
    data = sio.loadmat(path)['gestures']
    data = [np.transpose(np.delete(segment.astype(np.float32), np.s_[7:192:8], 0))
            for segment in data.flat]
    if preprocess:
        data = list(Context.parallel(jb.delayed(preprocess)(segment, **PREPROCESS_KARGS)
                                     for segment in data))
    return data


#  @cached
#  def _get_data(path, bandstop, cut, downsample):
    #  data = sio.loadmat(path)['gestures']
    #  data = [np.transpose(np.delete(segment.astype(np.float32), np.s_[7:192:8], 0))
            #  for segment in data.flat]
    #  if bandstop:
        #  data = list(Context.parallel(jb.delayed(get_bandstop)(segment) for segment in data))
    #  if cut is not None:
        #  data = list(Context.parallel(jb.delayed(cut)(segment, framerate=FRAMERATE) for segment in data))
    #  if downsample > 1:
        #  data = [segment[::downsample].copy() for segment in data]
    #  return data

项目：IDNNs 作者：ravidziv | 项目源码 | 文件源码

def get_information(ws, x, label, num_of_bins, interval_information_display, model, layerSize,
                    calc_parallel=True, py_hats=0):
    """Calculate the information for the network for all the epochs and all the layers"""
    print('Start calculating the information...')
    bins = np.linspace(-1, 1, num_of_bins)
    label = np.array(label).astype(np.float)
    pys, pys1, p_y_given_x, b1, b, unique_a, unique_inverse_x, unique_inverse_y, pxs = extract_probs(label, x)
    if calc_parallel:
        params = np.array(Parallel(n_jobs=NUM_CORES
                                   )(delayed(calc_information_for_epoch)
                                     (i, interval_information_display, ws[i], bins, unique_inverse_x, unique_inverse_y,
                                      label,
                                      b, b1, len(unique_a), pys,
                                      pxs, p_y_given_x, pys1, model.save_file, x.shape[1], layerSize)
                                     for i in range(len(ws))))
    else:
        params = np.array([calc_information_for_epoch
                           (i, interval_information_display, ws[i], bins, unique_inverse_x, unique_inverse_y,
                            label, b, b1, len(unique_a), pys,
                            pxs, p_y_given_x, pys1, model.save_file, x.shape[1], layerSize)
                           for i in range(len(ws))])
    return params

项目：brainpipe 作者：EtienneCmb | 项目源码 | 文件源码

def _erpac(xp, xa, n_perm, n_jobs):
    """Sub erpac function
    [xp] = [xa] = (npts, ntrials)
    """
    npts, ntrials = xp.shape
    # Compute ERPAC
    xerpac = np.zeros((npts,))
    for t in range(npts):
        xerpac[t] = circ_corrcc(xp[t, :], xa[t, :])[0]

    # Compute surrogates:
    data = Parallel(n_jobs=n_jobs)(delayed(_erpacSuro)(
            xp, xa, npts, ntrials) for pe in range(n_perm))
    suro = np.array(data)

    # Normalize erpac:
    xerpac = (xerpac - suro.mean(0))/suro.std(0)

    # Get p-value:
    pvalue = norm.cdf(-np.abs(xerpac))*2

    return xerpac, pvalue

项目：brainpipe 作者：EtienneCmb | 项目源码 | 文件源码

def _fit(x, y, clf, cv, mf, grp, center, n_jobs):
    """Sub function for fitting
    """
    # Check the inputs size :
    x, y = checkXY(x, y, mf, grp, center)
    rep, nfeat = len(cv), len(x)

    # Tricks : construct a list of tuple containing the index of
    # (repetitions,features) & loop on it. Optimal for parallel computing :
    claIdx, listRep, listFeat = list2index(rep, nfeat)

    # Run the classification :
    cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
        x[k[1]], y, clone(clf), cv[k[0]]) for k in claIdx)
    da, y_true, y_pred = zip(*cvs)

    # Reconstruct elements :
    da = np.array(groupInList(da, listFeat))
    y_true = groupInList(y_true, listFeat)
    y_pred = groupInList(y_pred, listFeat)

    return da, x, y, y_true, y_pred

项目：word2vec_pipeline 作者：NIHOPA | 项目源码 | 文件源码

def jobmap(func, INPUT_ITR, FLAG_PARALLEL=False, batch_size=None,
           *args, **kwargs):

    n_jobs = -1 if FLAG_PARALLEL else 1
    dfunc = joblib.delayed(func)

    with joblib.Parallel(n_jobs=n_jobs) as MP:

        # Yield the whole thing if there isn't a batch_size
        if batch_size is None:
            for z in MP(dfunc(x, *args, **kwargs)
                        for x in INPUT_ITR):
                yield z
            raise StopIteration

        ITR = iter(INPUT_ITR)
        progress_bar = tqdm()
        for block in grouper(ITR, batch_size):
            MPITR = MP(dfunc(x, *args, **kwargs) for x in block)
            for k,z in enumerate(MPITR):
                yield z
            progress_bar.update(k+1)

项目：vec4ir 作者：lgalke | 项目源码 | 文件源码

def process_and_evaluate(model, X, Y, k, n_jobs=1):
    """
    Arguments:
        X : query_id, query pairs
        Y : dict of dicts (harvestable)
        k : int how many to retrieve

    """
    print("Starting query time with %d jobs" % n_jobs)

    # TODO can we unzip Y and only pass the fucking chunk of y which 
    # it needs to harvest??
    qids_rs = Parallel(n_jobs=n_jobs)(delayed(process_query)(model, x, Y, k)
                                      for x in X)

    print("Evaluating the results:")

    scores = evaluate_results(qids_rs, Y, k)

    return scores

项目：mirapie 作者：Chutlhu | 项目源码 | 文件源码

def smooth(s,lengthscale,parallel=True):
    """smoothes s vertically"""
    if len(s.shape) == 1:
        s=s[...,None]
    nChans = s.shape[1]
    lengthscale=2*round(float(lengthscale)/2)
    W = np.hamming(min(lengthscale,s.shape[0]))
    W/= np.sum(W)
    if s.shape[1]>1:
        if parallel:
            njobs=JOBLIB_NCORES
        else:
            njobs=1

        slidingMean = (Parallel(n_jobs=njobs,backend=JOBLIB_BACKEND,temp_folder=JOBLIB_TEMPFOLDER)
                        (delayed(smoothLine)(s[:,chan],W) for chan in range(nChans)))
        return np.array(slidingMean).T
    else:
        return smoothLine(s[:,0],W)[...,None]

项目：pydl 作者：rafaeltg | 项目源码 | 文件源码

def run(self, model, x, y=None, scoring=None, max_threads=1):

        # get scorers
        if scoring is not None:
            if isinstance(scoring, list):
                scorers_fn = dict([(self.get_scorer_name(k), get_scorer(k)) for k in scoring])
            else:
                scorers_fn = dict([(self.get_scorer_name(scoring), get_scorer(scoring))])
        else:
            # By default uses the model loss function as scoring function
            scorers_fn = dict([(model.get_loss_func(), get_scorer(model.get_loss_func()))])

        model_cfg = model.to_json()

        if y is None:
            args = [(model_cfg['model'], train, test, x, scorers_fn) for train, test in self.cv.split(x, y)]
            cv_fn = self._do_unsupervised_cv
        else:
            args = [(model_cfg['model'], train, test, x, y, scorers_fn) for train, test in self.cv.split(x, y)]
            cv_fn = self._do_supervised_cv

        with Parallel(n_jobs=min(max_threads, len(args))) as parallel:
            cv_results = parallel(delayed(function=cv_fn, check_pickle=False)(*a) for a in args)

        return self._consolidate_cv_scores(cv_results)

项目：glmnet_py 作者：hanfang | 项目源码 | 文件源码

def testParallel(parallel = True):

    inputs = range(0, 1000, 1)
    param = 1000
    if parallel == True:
    # parallel stuff
    # This is reference code for parallel implementation 
        inputs = range(10)
        num_cores = multiprocessing.cpu_count()
        results = joblib.Parallel(n_jobs=num_cores)(joblib.delayed(childFunc)(i, param) for i in inputs)

    else:
        for i in inputs:
            childFunc(i)

    print(results)