Python sklearn.externals.joblib 模块，dump() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用sklearn.externals.joblib.dump()。

项目：libskeletal 作者：bobbybee | 项目源码 | 文件源码

def trainModel(featureCount, imageCount, save):
    clf = RandomForestRegressor(n_estimators=1, n_jobs=-1)

    features = generateFeatures(featureCount)

    for image in range(0, imageCount):
        print "Image " + str(image)
        train(clf, features, image)

    clf = clf.fit(X, Y)
    model = (clf, features)

    if save:
        joblib.dump(model, "model.pkl")

    return model

项目：rosie 作者：datasciencebr | 项目源码 | 文件源码

def load_trained_model(self, classifier):
        filename = '{}.pkl'.format(classifier.__name__.lower())
        path = os.path.join(self.data_path, filename)

        # palliative: this outputs a model too large for joblib
        if classifier.__name__ == 'MonthlySubquotaLimitClassifier':
            model = classifier()
            model.fit(self.dataset)

        else:
            if os.path.isfile(path):
                model = joblib.load(path)
            else:
                model = classifier()
                model.fit(self.dataset)
                joblib.dump(model, path)

        return model

项目：Dense-Net 作者：achyudhk | 项目源码 | 文件源码

def make_check_point(self):

        num, last_checkpoints = self.load_current_checkpoints()

        if self.best_val_acc > last_checkpoints['best_val_acc']:
            best_val_acc = self.best_val_acc
            best_params = self.best_params
        else:
            best_val_acc = last_checkpoints['best_val_acc']
            best_params = last_checkpoints['best_params']

        checkpoints = {
            'model': self.model,
            'epoch': self.epoch,
            'best_params': best_params,
            'best_val_acc': best_val_acc,
            'loss_history': self.loss_history,
            'train_acc_history': self.train_acc_history,
            'val_acc_history': self.val_acc_history}

        name = 'check_' + str(num + 1)
        os.mkdir(os.path.join(self.path_checkpoints, name))
        joblib.dump(checkpoints, os.path.join(
            self.path_checkpoints, name, name + '.pkl'))

项目：elephant_sense 作者：chakki-works | 项目源码 | 文件源码

def __init__(self, clf, scaler, pf_df, data_folder=""):
        model_file_name = "banana.pkl"
        scaler_file_name = "banana_scaler.pkl"
        list_file_name = "banana_list.txt"

        def_file_path = "../../models/"
        self.data_folder = data_folder

        if not data_folder:
            model_file = os.path.join(os.path.dirname(__file__), def_file_path) + model_file_name
            scaler_file = os.path.join(os.path.dirname(__file__), def_file_path) + scaler_file_name
            list_file = os.path.join(os.path.dirname(__file__), def_file_path) + list_file_name
        else:
            model_file = self.data_folder + model_file_name
            scaler_file = self.data_folder + scaler_file_name
            list_file = self.data_folder + list_file_name


        joblib.dump(clf, model_file)
        joblib.dump(scaler, scaler_file)

        with open(list_file, "w") as f:
            f.write(" ".join(pf_df.columns.tolist()))

项目：stacked_generalization 作者：fukatani | 项目源码 | 文件源码

def get_cache_file(model_id, index, cache_dir='', suffix='csv'):
    # Identify index trick.
    # If sum of first 20 index, recognize as the same index.
    if index is None:
        raise IOError
    if len(index) < 20:
        sum_index = sum(index)
    else:
        sum_index = sum(index[:20])
    return "{0}{1}_{2}.{3}".format(cache_dir,
                                   model_id,
                                   sum_index,
                                   suffix)

##def saving_fit(learner, X, y, index):
##    import os
##    pkl_file = "{0}_{1}_{2}.pkl".format(learner.id, min(index), max(index))
##    try:
##        learner = joblib.load(pkl_file)
##        print("**** learner is loaded from {0} ****".format(pkl_file))
##    except IOError:
##        learner.fit(X, y)
##        joblib.dump(learner, pkl_file)
##    return learner

项目：hugo_similar_posts 作者：elbaulp | 项目源码 | 文件源码

def KmeansWrapper(true_k, data, load=False):
    from sklearn.externals import joblib

    modelName = 'doc_cluster.%s.plk' % true_k

    if load:
        km = joblib.load(modelName)
        labels = km.labels_
    else:
        km = KMeans(n_clusters=true_k,
                    init='k-means++',
                    # max_iter=1000,
                    n_init=10,
                    n_jobs=-1,
                    random_state=0,
                    verbose=0)
        km.fit_predict(data)
        labels = km.labels_
        joblib.dump(km,  modelName)

    return labels, km.cluster_centers_

项目：sl-quant 作者：danielzak | 项目源码 | 文件源码

def init_state(indata, test=False):
    close = indata['close'].values
    diff = np.diff(close)
    diff = np.insert(diff, 0, 0)
    sma15 = SMA(indata, timeperiod=15)
    sma60 = SMA(indata, timeperiod=60)
    rsi = RSI(indata, timeperiod=14)
    atr = ATR(indata, timeperiod=14)

    #--- Preprocess data
    xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))

    xdata = np.nan_to_num(xdata)
    if test == False:
        scaler = preprocessing.StandardScaler()
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
        joblib.dump(scaler, 'data/scaler.pkl')
    elif test == True:
        scaler = joblib.load('data/scaler.pkl')
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
    state = xdata[0:1, 0:1, :]

    return state, xdata, close

#Take Action

项目：time_series_modeling 作者：rheineke | 项目源码 | 文件源码

def persist_pipelines(pipelines):
    Path('models').mkdir(exist_ok=True)
    fp_fmt = 'models/{}-{:%y-%m-%d}.pkl'
    now = dt.datetime.now()
    for pipe in pipelines:
        print(utils.pipeline_name(pipe))
        fp_name = fp_fmt.format(utils.pipeline_name(pipe), now)
        joblib.dump(pipe, fp_name)
        # Pickle fails to work on RandomForestRegressor
        # with open(fp_name, 'wb') as fp:
        #     pickle.dump(pipe, fp)

项目：FreeDiscovery 作者：FreeDiscovery | 项目源码 | 文件源码

def _vectorize_chunk(dsid_dir, k, pars, pretend=False):
    """ Extract features on a chunk of files """
    from sklearn.feature_extraction.text import HashingVectorizer
    from sklearn.externals import joblib

    filenames = pars['filenames_abs']
    chunk_size = pars['chunk_size']
    n_samples = pars['n_samples']

    mslice = slice(k*chunk_size, min((k+1)*chunk_size, n_samples))

    hash_opts = {key: vals for key, vals in pars.items()
                 if key in ['stop_words', 'n_features',
                            'analyser', 'ngram_range']}
    hash_opts['alternate_sign'] = False
    fe = HashingVectorizer(input='content', norm=None, **hash_opts)
    if pretend:
        return fe
    fset_new = fe.transform(_read_file(fname) for fname in filenames[mslice])

    fset_new.eliminate_zeros()

    joblib.dump(fset_new, str(dsid_dir / 'features-{:05}'.format(k)))

项目：CAAPR 作者：Stargrazer82301 | 项目源码 | 文件源码

def dump_classifier(self):

        """
        This function ...
        :return:
        """

        # Determine the path to the pickle file
        classifier_path = os.path.join(self.classification_mode_path, "classifier.pkl")

        # Inform the user
        self.log.info("Writing the classifier to " + classifier_path)

        # Serialize and dump the classifier
        joblib.dump(self.vector_classifier, classifier_path)

    # -----------------------------------------------------------------

项目：CAAPR 作者：Stargrazer82301 | 项目源码 | 文件源码

def dump_classifier(self):

        """
        This function ...
        :return:
        """

        # Determine the path to the pickle file
        classifier_path = os.path.join(self.classification_mode_path, "classifier.pkl")

        # Inform the user
        self.log.info("Writing the classifier to " + classifier_path)

        # Serialize and dump the classifier
        joblib.dump(self.vector_classifier, classifier_path)

    # -----------------------------------------------------------------