Python sklearn.externals.joblib 模块,load() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.externals.joblib.load()

项目:Verification-code-crack    作者:weixianglin    | 项目源码 | 文件源码
def predict():
    fnn=joblib.load(PKL)
    dir='E:/????/??????/1 ???/captcha_master1/captcha_master/worddata/'
    predictValue = []
    for fr in os.listdir(dir):
        dataset=[]
        f = dir + fr
        if f.rfind(u'.DS_Store') == -1 and f.rfind(u'Thumbs.db') == -1:
            data = np.loadtxt(f, delimiter=',')
            #data.reshape((1,2500))
            for item in data:
                dataset.append(int(item))

            #print(len(dataset))
            out = fnn.activate(dataset)
            out = out.argmax()
            iconset = ['3', 'c', 'd', 'e', 'f', 'h', 'j', 'k', 'l', 'm', 'n', 'w', 'x', 'y']
            for y, word in enumerate(iconset):
                if out == y:
                    print(word)
                    predictValue.append(word)

    print(u'????%s' % (''.join(predictValue)))
项目:Verification-code-crack    作者:weixianglin    | 项目源码 | 文件源码
def test():
    DS=loadPybrainData()
    train,test=DS.splitWithProportion(0.1)
    fnn=joblib.load(PKL)
    # ??test??
    output = fnn.activateOnDataset(test)
    # ann.activate(onedata)????????????
    outputs=[]
    target=[]
    count=0
    for out in output:
        outs=out.argmax()
        outputs.append(outs)
    for tar in test['target']:
        ta=tar.argmax()
        target.append(ta)
    for i in range(0,len(target)):
        if outputs[i]==target[i]:
            count+=1

    right=count/len(target)#???????
    rate=(right**4)
    print("???????%.4f%%" % (rate * 100))
    v = Validator()
    print(u'??????',v.MSE(output, test['target']))#??test?????????????,????????
项目:rosie    作者:datasciencebr    | 项目源码 | 文件源码
def load_trained_model(self, classifier):
        filename = '{}.pkl'.format(classifier.__name__.lower())
        path = os.path.join(self.data_path, filename)

        # palliative: this outputs a model too large for joblib
        if classifier.__name__ == 'MonthlySubquotaLimitClassifier':
            model = classifier()
            model.fit(self.dataset)

        else:
            if os.path.isfile(path):
                model = joblib.load(path)
            else:
                model = classifier()
                model.fit(self.dataset)
                joblib.dump(model, path)

        return model
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def loadcolumn(filename,col=4, skip=1, floats=True):
    pred=[]
    op=open(filename,'r')
    if skip==1:
        op.readline() #header
    for line in op:
        line=line.replace('\n','')
        sps=line.split(',')
        #load always the last columns
        if floats:
            pred.append(float(sps[col]))
        else :
            pred.append(str(sps[col]))
    op.close()
    return pred            


#functions to manipulate pickles
项目:SNAP_R    作者:zerofox-oss    | 项目源码 | 文件源码
def is_target(screen_name, disable_targeting, model_file='cluster.pkl'):
    """
    Returns a boolean for whether the user should be selected according
    to label identity returned by a prediction from a pretrained
    clustering algorithm.
    """
    if disable_targeting:
        return True
    else:
        auth = tweepy.OAuthHandler(credentials.consumer_key,
                                   credentials.consumer_secret)
        auth.set_access_token(credentials.access_token,
                              credentials.access_token_secret)
        api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())
        user_array = numpy.array([api.get_user(screen_name=screen_name)])
        model = joblib.load(model_file)
        cluster_label = model.predict(user_array)
        return cluster_label == 1
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def get_cache_file(model_id, index, cache_dir='', suffix='csv'):
    # Identify index trick.
    # If sum of first 20 index, recognize as the same index.
    if index is None:
        raise IOError
    if len(index) < 20:
        sum_index = sum(index)
    else:
        sum_index = sum(index[:20])
    return "{0}{1}_{2}.{3}".format(cache_dir,
                                   model_id,
                                   sum_index,
                                   suffix)

##def saving_fit(learner, X, y, index):
##    import os
##    pkl_file = "{0}_{1}_{2}.pkl".format(learner.id, min(index), max(index))
##    try:
##        learner = joblib.load(pkl_file)
##        print("**** learner is loaded from {0} ****".format(pkl_file))
##    except IOError:
##        learner.fit(X, y)
##        joblib.dump(learner, pkl_file)
##    return learner
项目:hugo_similar_posts    作者:elbaulp    | 项目源码 | 文件源码
def KmeansWrapper(true_k, data, load=False):
    from sklearn.externals import joblib

    modelName = 'doc_cluster.%s.plk' % true_k

    if load:
        km = joblib.load(modelName)
        labels = km.labels_
    else:
        km = KMeans(n_clusters=true_k,
                    init='k-means++',
                    # max_iter=1000,
                    n_init=10,
                    n_jobs=-1,
                    random_state=0,
                    verbose=0)
        km.fit_predict(data)
        labels = km.labels_
        joblib.dump(km,  modelName)

    return labels, km.cluster_centers_
项目:image-text-matching    作者:llltttppp    | 项目源码 | 文件源码
def __init__(self,is_training=True,is_skip=False, batch_size= 100, is_TopKloss=True, 
                 word2vec_model='./model/word2vec/ourword2vec.pkl'):
        # word2vec_model='/media/wwt/860G/model/word2vec/cn.cbow.bin'
        #self.model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model, binary=True, unicode_errors='ignore')
        self.word2vec = pkl.load(open(word2vec_model,'r'))
        self.batch_size = batch_size
        self.weight_decay = 0.0005
        self.endpoint={}
        self.is_skip=is_skip
        self.is_TopKloss = is_TopKloss
        self.is_training = is_training
        self.keep_prob = 0.5 if is_training else 1.0
        self.build_input()
        #self.build_matchnet()
        #self.build_classify()
    #self.build_crossEnt_class()
    #self.loss_weight = 0.
    self.build_unite()
        if is_training:
            #self.build_summary()
            #self.build_summary_crossEnt()
        self.build_summary_unite()
项目:image-text-matching    作者:llltttppp    | 项目源码 | 文件源码
def generate_fishervector(sample_set,ica_model='./model/ICA/ica_ourword2vec.model',gmm_model_path='./model/GMM/gmm_ourword2vec.model',max_num = 30000):
    ica = joblib.load(ica_model)
    gmm_model =pkl.load(open(gmm_model_path,'r'))
    centenrs = gmm_model[0].shape[0]
    dims = gmm_model[1].shape[1]
    fishervector = np.zeros([len(sample_set),centenrs*dims*2])+0.00001
    for i,v in enumerate(sample_set):
        words =v.strip().split(' ')
        words = words[:min(len(words),max_num+200)]
        vectors =[]
        for j in words:
            try:
                vectors.append(word2vec_model[j])
            except:
                pass#print 'Not found %s'%j
        if len(vectors) >0:
            vectors=vectors[:min(len(vectors),max_num)]
            fishervector[i]=yael.ynumpy.fisher(gmm_model,ica.transform(np.array(vectors)).astype(np.float32) ,include='mu sigma')
    print 'mean vectors is',fishervector.mean(0)
    return fishervector
项目:image-text-matching    作者:llltttppp    | 项目源码 | 文件源码
def __init__(self,is_training=True,is_skip=False, batch_size= 100, is_TopKloss=True, 
                 word2vec_model='./model/word2vec/ourword2vec.pkl'):
        # word2vec_model='/media/wwt/860G/model/word2vec/cn.cbow.bin'
        #self.model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model, binary=True, unicode_errors='ignore')
        self.word2vec = pkl.load(open(word2vec_model,'r'))
        self.batch_size = batch_size
        self.weight_decay = 0.0005
        self.endpoint={}
        self.is_skip=is_skip
        self.is_TopKloss = is_TopKloss
        self.is_training = is_training
        self.keep_prob = 0.5 if is_training else 1.0
        self.build_input()
        #self.build_matchnet()
        #self.build_classify()
        #self.build_crossEnt_class()
        #self.loss_weight = 0.
        self.build_unite()
        if is_training:
            #self.build_summary()
            #self.build_summary_crossEnt()
            self.build_summary_unite()
项目:sl-quant    作者:danielzak    | 项目源码 | 文件源码
def init_state(indata, test=False):
    close = indata['close'].values
    diff = np.diff(close)
    diff = np.insert(diff, 0, 0)
    sma15 = SMA(indata, timeperiod=15)
    sma60 = SMA(indata, timeperiod=60)
    rsi = RSI(indata, timeperiod=14)
    atr = ATR(indata, timeperiod=14)

    #--- Preprocess data
    xdata = np.column_stack((close, diff, sma15, close-sma15, sma15-sma60, rsi, atr))

    xdata = np.nan_to_num(xdata)
    if test == False:
        scaler = preprocessing.StandardScaler()
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
        joblib.dump(scaler, 'data/scaler.pkl')
    elif test == True:
        scaler = joblib.load('data/scaler.pkl')
        xdata = np.expand_dims(scaler.fit_transform(xdata), axis=1)
    state = xdata[0:1, 0:1, :]

    return state, xdata, close

#Take Action
项目:ml_defense    作者:arjunbhagoji    | 项目源码 | 文件源码
def model_loader(model_dict, adv=None, rd=None, rev=None):
    """
    Returns a classifier object if it already exists. Returns None, otherwise.
    """
    if adv is None:
        adv_mag = None
    print('Loading model...')
    abs_path_m = resolve_path_m(model_dict)
    try:
        clf = joblib.load(abs_path_m + get_svm_model_name(model_dict, adv, adv_mag, rd, rev) +
                          '.pkl')
    except BaseException:
        clf = None

    return clf
#------------------------------------------------------------------------------#
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def vect_(self):
        if not hasattr(self, '_vect') or self._vect is None:
            mid = self.dsid
            mid_dir = self.cache_dir / mid
            if not mid_dir.exists():
                raise ValueError(('Vectorizer model id {} ({}) '
                                  'not found in the cache {}!')
                                 .format(mid, mid_dir))
            fname = mid_dir / 'vectorizer'
            if self.pars_['use_hashing']:
                self._vect = joblib.load(str(fname))
            else:
                # this is much faster in python 3 as cpickle is used
                # (only works if no numpy arrays are used)
                with fname.open('rb') as fh:
                    self._vect = pickle.load(fh)
        return self._vect
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def _show(args):
    cache_dir = _parse_cache_dir(args.cache_dir)
    p = PipelineFinder.by_id(mid=args.mid, cache_dir=cache_dir)
    print(p)
    print(' * model_id: {}'.format(args.mid))
    print(' * model_type: {}'.format(list(p.keys())[-1]))
    print(' * file_path: {}'.format(p.get_path()))
    try:
        pars = joblib.load(os.path.join(p.get_path(), 'pars'))
        for key, val in pars.items():
            val_str = str(val)
            if len(val_str) > 30 and not isinstance(val, dict):
                continue
            print(' * {}: {}'.format(key, val_str))
    except:
        pass
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def test_get_feature_extraction(app, hashed, weighting):
    norm_alpha = 0.5
    dsid, _, _ = get_features_cached(app, hashed=hashed, weighting=weighting,
                                     norm_alpha=norm_alpha)
    method = V01 + "/feature-extraction/{}".format(dsid)
    data = app.get_check(method)
    assert dict2type(data, collapse_lists=True) == {'analyzer': 'str',
                     'ngram_range': ['int'], 'stop_words': 'str',
                     'n_jobs': 'int', 'chunk_size': 'int',
                     'data_dir': 'str', 'n_samples': 'int',
                     'n_features': 'int', 'weighting': 'str',
                     'norm_alpha': 'float', 'use_hashing': 'bool',
                     'filenames': ['str'], 'max_df': 'float', 'min_df': 'float',
                     'parse_email_headers': 'bool', 'n_samples_processed': 'int',
                     'preprocess': []}

    assert data['use_hashing'] == hashed
    assert data['weighting'] == weighting
    assert data['norm_alpha'] == norm_alpha

    vect = joblib.load(os.path.join(CACHE_DIR, 'ediscovery_cache', dsid, 'vectorizer'))
    assert (data['use_hashing'] is True) == ('hashing' in type(vect).__name__.lower())
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def predict_function():
    x_list = []
    line_list = []
    line_dict = {}
    predict_doc = joblib.load('logreg.pkl')
    feature_doc = joblib.load("word_vec.pkl")
    y_train, x_train = get_feature()
    line = "bad bad good good"
    line_list = line.split()
    for line in x_train:
        for key in line:
            line_dict[key] = 0
    line_dict.update(dict(Counter(line_list)))
    for a in sorted(line_dict.items(), key = lambda x:x[1]):
        print(a)
    x_list.append(line_dict)
    print(x_list)
    exit()
    X = DictVectorizer().fit_transform(x_list)
    pred = predict_doc.predict(X)
    prob = predict_doc.predict_proba(X)
    for pred, prob in zip(pred,prob):
        print(pred, prob)
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def from_file(cls, path):

        """
        This function ...
        :param path:
        :return:
        """

        # Create a new classifier instance
        classifier = cls()

        # Load the classifier
        classifier.vector_classifier = joblib.load(path)

        # Return the classifier
        return classifier

    # -----------------------------------------------------------------
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def from_file(cls, path):

        """
        This function ...
        :param path:
        :return:
        """

        # Create a new classifier instance
        classifier = cls()

        # Load the classifier
        classifier.vector_classifier = joblib.load(path)

        # Return the classifier
        return classifier

    # -----------------------------------------------------------------
项目:Sohu-LuckData-Image-Text-Matching-Competition    作者:WeitaoVan    | 项目源码 | 文件源码
def __init__(self,is_training=True,is_skip=False, batch_size= 100, is_TopKloss=True, 
                 word2vec_model='/media/wwt/860G/data/souhu_data/fusai/train/word2vec_11w.pkl'):
        # word2vec_model='/media/wwt/860G/model/word2vec/cn.cbow.bin'
        #self.model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_model, binary=True, unicode_errors='ignore')
        self.word2vec = pkl.load(open(word2vec_model,'r'))
        self.batch_size = batch_size
        self.weight_decay = 0.000001
        self.endpoint={}
        self.is_skip=is_skip
        self.is_TopKloss = is_TopKloss
        self.is_training = is_training
        self.keep_prob = 0.5 if is_training else 1.0
        self.build_input()
        #self.build_matchnet()
        #self.build_classify()
    #self.build_crossEnt_class()
    self.loss_weight = 0.
    self.build_unite(self.loss_weight)
        if is_training:
            #self.build_summary()
            #self.build_summary_crossEnt()
        self.build_summary_unite()
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def load_movielens(version):
    data_home = get_data_dirs()[0]

    if version == "100k":
        path = os.path.join(data_home, "movielens100k", "movielens100k.pkl")
    elif version == "1m":
        path = os.path.join(data_home, "movielens1m", "movielens1m.pkl")
    elif version == "10m":
        path = os.path.join(data_home, "movielens10m", "movielens10m.pkl")
    else:
        raise ValueError("Invalid version of movielens.")

    # FIXME: make downloader
    if not os.path.exists(path):
        raise ValueError("Dowload dataset using 'make download-movielens%s' at"
                         " project root." % version)

    X = load(path)
    return X
项目:MusicAnalyser    作者:ShivayaDevs    | 项目源码 | 文件源码
def predict_song(wavfile):
    sampling_rate, song_array = scipy.io.wavfile.read(wavfile)
    song_array[song_array == 0] = 1
    ceps, mspec, spec = mfcc(song_array)
    base_wav, ext = os.path.splitext(wavfile)
    data_wav = base_wav + ".ceps"
    np.save(data_wav, ceps)

    # features
    X = []
    Y = []
    ceps = np.load(data_wav + ".npy")
    num_ceps = len(ceps)
    X.append(np.mean(ceps[int(num_ceps * 1 / 10): int(num_ceps * 9 / 10)], axis=0))

    # prediction
    # print predict_file(X)
    genre_list = ["country", "hiphop", "metal", "pop", "reggae", "rock"]

    clf = joblib.load('./analyser/ml_utils/genre_classify/model_ceps.pkl')
    index = clf.predict(X)
    return genre_list[index[0]]
项目:algotrading    作者:alifanov    | 项目源码 | 文件源码
def backtesting_with_lstm():
    model = get_loaded_model()
    df = pd.read_csv('btc_etc.csv').rename(columns={
        'Close': 'close',
        'Date time': 'datetime',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Volume': 'volume'
    })
    ds = DataSeries(df)

    scaler = joblib.load(open('scaler.sav', 'rb'))
    look_back = 1

    bt = NNBT(ds, 1000.0, model, look_back, scaler)
    bt.run()

    print('Profit: ${:.2f}'.format(bt.get_profit()))
项目:algotrading    作者:alifanov    | 项目源码 | 文件源码
def backtesting_with_lstm():
    model = get_loaded_model()
    df = pd.read_csv('btc_etc.csv').rename(columns={
        'Close': 'close',
        'Date time': 'datetime',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Volume': 'volume'
    })
    ds = DataSeries(df)

    scaler = joblib.load(open('scaler.sav', 'rb'))
    look_back = 1

    bt = NNBT(ds, 1000.0, model, look_back, scaler)
    bt.run()

    print('Profit: ${:.2f}'.format(bt.get_profit()))
项目:tpai_comp    作者:luuuyi    | 项目源码 | 文件源码
def use_model_to_predict(test_df, model):
    test_df.drop(['label'], axis=1, inplace=True)
    print 'Fix Missing App Count Value...'
    model_miss = joblib.load('XGB_missing.model')
    test_df = fix_missing_appcounts(test_df, model_miss)
    '''print 'Fix Missing Age Value...'
    model_age = joblib.load('XGB_age.model')
    test_df = fix_missing_age(test_df, model_age)'''
    test_df.drop(['marriageStatus','haveBaby','sitesetID', 'positionType'], axis=1, inplace=True)
    print 'Done'
    print test_df.info()
    print test_df.describe()
    print test_df.isnull().sum()
    test_np = test_df.as_matrix()
    X = test_np[:, 1:]
    print 'Use Model To Predict...'
    predicts = model.predict(X)
    result = pd.DataFrame({'instanceID':test_df['instanceID'].as_matrix(), 'prob':predicts})
    #print predicts#, predicts.min(axis=0), predicts.max(axis=0), predicts.sum(axis=1)
    return result
项目:facial-keypoints-detection    作者:saber1988    | 项目源码 | 文件源码
def generate_submission(test_dataset, sess, eval_prediction, eval_data_node):
    test_labels = eval_in_batches(test_dataset, sess, eval_prediction, eval_data_node)
    test_labels *= 96.0
    test_labels = test_labels.clip(0, 96)

    lookup_table = pd.read_csv(FLOOKUP)
    values = []

    cols = joblib.load('data/cols.pkl')

    for index, row in lookup_table.iterrows():
        values.append((
            row['RowId'],
            test_labels[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
        ))
    submission = pd.DataFrame(values, columns=('RowId', 'Location'))
    submission.to_csv('data/submission.csv', index=False)
项目:facial-keypoints-detection    作者:saber1988    | 项目源码 | 文件源码
def make_submission(test_labels):
    test_labels *= 96.0
    test_labels = test_labels.clip(0, 96)

    lookup_table = pd.read_csv(FLOOKUP)
    values = []

    cols = joblib.load('data/cols.pkl')

    for index, row in lookup_table.iterrows():
        values.append((
            row['RowId'],
            test_labels[row.ImageId - 1][np.where(cols == row.FeatureName)[0][0]],
        ))
    submission = pd.DataFrame(values, columns=('RowId', 'Location'))
    submission.to_csv('data/submission.csv', index=False)
项目:ocr    作者:lznumber1    | 项目源码 | 文件源码
def splity(im):
    s = ''
    w,h = im.size
    pix = im.load()
    for j in xrange(h):
        num = 0
        for i in xrange(w):
            # print pix[i,j]
            if pix[i,j]==BLACK:
                num += 1
        if num > 0:
            s += '1'
        else:
            s += '0'
    # print s
    start = s.find('1')
    end = s.rfind('1')
    return im.crop((0,start,w,end))
项目:PyMLT    作者:didw    | 项目源码 | 文件源码
def __init__(self, s_date):
        prev_bd = int(s_date[:6])-1
        prev_ed = int(s_date[9:15])-1
        if prev_bd%100 == 0: prev_bd -= 98
        if prev_ed%100 == 0: prev_ed -= 98
        pred_s_date = "%d01_%d01" % (prev_bd, prev_ed)
        prev_model = '../model/tflearn/lstm/%s' % pred_s_date
        self.model_dir = '../model/tflearn/lstm/%s' % s_date

        tf.reset_default_graph()
        tflearn.init_graph(gpu_memory_fraction=0.1)
        input_layer = tflearn.input_data(shape=[None, 30, 23], name='input')
        lstm1 = tflearn.lstm(input_layer, 23, dynamic=True, name='lstm1')
        dense1 = tflearn.fully_connected(lstm1, 1, name='dense1')
        output = tflearn.single_unit(dense1)
        regression = tflearn.regression(output, optimizer='adam', loss='mean_square',
                                metric='R2', learning_rate=0.001)
        self.estimators = tflearn.DNN(regression)
        if os.path.exists('%s/model.tfl' % prev_model):
            self.estimators.load('%s/model.tfl' % prev_model)
项目:word_segmentation    作者:CongSon1293    | 项目源码 | 文件源码
def load_vocab(self, vocab):
        self.vocab = self.load('model/vocab.pkl')
        self.max_length = self.load('model/max_length.pkl')
        if self.vocab != None and self.max_length != None:
            return
        vocab_temp, self.max_length = utils.load_data2list_string(vocab)
        # vocab_temp = sorted(vocab_temp, key=lambda s: len(s.split()), reverse=True)
        vocab_temp = filter(lambda s: len(s.split()) > 1, vocab_temp) # remove word have one syllable
        vocab_temp_clone = map(lambda s: s.replace(u' ', u'_'), vocab_temp)
        self.vocab = {i:{} for i in xrange(1, self.max_length+1)}
        for i in xrange(len(vocab_temp)):
            s = vocab_temp[i]
            ss = vocab_temp_clone[i]
            w = s.split()[0]
            length = vocab_temp[i].count(u' ')
            try: self.vocab[length][w].update({s:ss})
            except: self.vocab[length].update({w:{s:ss}})
        print('size of vocab = %d' % (len(vocab_temp)))
        self.save_model(self.vocab, 'model/vocab.pkl')
        self.save_model(self.max_length, 'model/max_length.pkl')
项目:pygameweb    作者:pygame    | 项目源码 | 文件源码
def classify_comment(comment):
    """Classify the comment.

    :param comment: should have a message attribute.
    """
    global _comment_pipeline
    from sklearn.externals import joblib

    model_is_not_loaded = _comment_pipeline is None
    if model_is_not_loaded:
        import pygameweb.comment.classifier_train
        import pygameweb.config
        model_fname = pygameweb.config.Config.COMMENT_MODEL
        _comment_pipeline = joblib.load(model_fname)

    return _comment_pipeline.predict([comment.message])[0]
项目:algo-trading-pipeline    作者:NeuralKnot    | 项目源码 | 文件源码
def load_model(self):
        self.scaler = joblib.load("data_analysis/scaler.pkl")
        self.model = joblib.load("data_analysis/model.pkl")

    # Processes the given article and stores the results in the queue
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def create_ranklist (data ) :
    for j in range(len(data[0])):
        putcolumn( data,ranking(select_column(data,j)),j)


# method to load a specific column
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def loadcolumn(filename,col=4, skip=1, floats=True):
    pred=[]
    op=open(filename,'r')
    if skip==1:
        op.readline() #header
    for line in op:
        line=line.replace('\n','')
        sps=line.split(',')
        #load always the last columns
        if floats:
            pred.append(float(sps[col]))
        else :
            pred.append(str(sps[col]))
    op.close()
    return pred
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def create_ranklist (data ) :
    for j in range(len(data[0])):
        putcolumn( data,ranking(select_column(data,j)),j)


# method to load a specific column
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def loadcolumn(filename,col=4, skip=1, floats=True):
    pred=[]
    op=open(filename,'r')
    if skip==1:
        op.readline() #header
    for line in op:
        line=line.replace('\n','')
        sps=line.split(',')
        #load always the last columns
        if floats:
            pred.append(float(sps[col]))
        else :
            pred.append(str(sps[col]))
    op.close()
    return pred
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def create_ranklist (data ) :
    for j in range(len(data[0])):
        putcolumn( data,ranking(select_column(data,j)),j)


# method to load a specific column
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def load_datas(filename):

    return joblib.load(filename)
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def create_ranklist (data ) :
    for j in range(len(data[0])):
        putcolumn( data,ranking(select_column(data,j)),j)


# method to load a specific column
项目:ensemble_amazon    作者:kaz-Anova    | 项目源码 | 文件源码
def loadcolumn(filename,col=4, skip=1, floats=True):
    pred=[]
    op=open(filename,'r')
    if skip==1:
        op.readline() #header
    for line in op:
        line=line.replace('\n','')
        sps=line.split(',')
        #load always the last columns
        if floats:
            pred.append(float(sps[col]))
        else :
            pred.append(str(sps[col]))
    op.close()
    return pred
项目:a-cadmci    作者:florez87    | 项目源码 | 文件源码
def load(self, path):
        """
        Load a model and it's classes with joblib and pickle.

        Parameters
        ----------
        path: string
            The location of the persistence directory from which model and classes will be loaded.

        Returns
        ----------
        None
        """
        self.model = joblib.load(path + 'tree.pkl')
        self.classes = joblib.load(path + 'classes.pkl')
项目:karura    作者:icoxfog417    | 项目源码 | 文件源码
def build(self, environment, ml_definitions):
        self._messages.clear()

        # read received definitions and configure these
        field_manager = FieldManager.read_definitions(ml_definitions)
        field_manager.init(environment)

        # load dataset and evaluate
        dataset = DataSet.load_dataset(environment, field_manager=field_manager)
        self._merge_and_check_messages(dataset.evaluate())

        # build the feature from field and dataset
        f_builder = FeatureBuilder(field_manager)
        f_builder.build(dataset)
        self._merge_and_check_messages(f_builder.evaluate())

        # adjust the dataset to the feature
        adjusted = f_builder.field_manager.adjust(dataset)

        # make & train the model
        m_builder = ModelBuilder(f_builder.field_manager)
        m_builder.build(adjusted)
        self._merge_and_check_messages(m_builder.evaluate())

        self.field_manager = f_builder.field_manager
        self.model = m_builder.model
        self.model_score = m_builder.model_score
项目:karura    作者:icoxfog417    | 项目源码 | 文件源码
def load(cls, app_id):
        home_dir = cls.__home_dir(app_id)
        if not os.path.isdir(home_dir):
            raise Exception("Model File for application {} have not created yet.".format(app_id))

        path_fieldm = os.path.join(home_dir, cls.FIELD_MANAGER_FILE)
        with open(path_fieldm, mode="r", encoding="utf-8") as md:
            serialized = json.load(md)
            field_manager = FieldManager.load(serialized)

        trained_model = joblib.load(os.path.join(home_dir, cls.MODEL_FILE))

        model_manager = ModelManager(field_manager, trained_model)

        return model_manager
项目:kaggle-dstl-satellite-imagery-feature-detection    作者:u1234x1234    | 项目源码 | 文件源码
def mask_to_poly(image_id):
    preds = joblib.load('raw_preds/raw_blend5/{}.pkl'.format(image_id))
    size = preds.shape[1]
    if n_out == 10:
#        preds = (preds > 0.3).astype(np.uint8)

        thresholds = np.array([0.4, 0.4, 0.4, 0.4, 0.8,
                               0.4, 0.4, 0.4, 0.1, 0.1]).reshape((10, 1))
        preds = (preds.reshape((10, -1)) > thresholds).reshape((10, size, size))
        preds = preds.astype(np.uint8)
    else:
        preds = np.argmax(preds, axis=0)
        preds = unsoft(preds)

    rg = colorize_raster(preds.transpose((1, 2, 0)))
#    cv2.imwrite('1.png', rg)
    size = 900
    rg = cv2.resize(rg, (size, size))
#    cv2.imshow('mask', rg)
#    cv2.waitKey()
    im = get_rgb_image(image_id, size, size)
    rg = np.hstack([rg, im])
    cv2.imwrite('raw_temp5_1/{}.png'.format(image_id), rg)

    shs = []
    for i in range(10):
        mask = preds[i]

        y_sf, x_sf = get_scale_factor(image_id, mask.shape[0], mask.shape[1])
        y_sf = 1. / y_sf
        x_sf = 1. / x_sf

        sh = polygonize_cv(mask)
#        sh = polygonize_sk((mask>0)*255, 0)
#        sh = (sh1.buffer(0).intersection(sh2.buffer(0))).buffer(0)

#        if not sh.is_valid:
#            sh = sh.buffer(0)
        sh = affinity.scale(sh, xfact=x_sf, yfact=y_sf, origin=(0, 0, 0))
        shs.append(sh)
    return shs
项目:probablyPOTUS    作者:jjardel    | 项目源码 | 文件源码
def _load_credentials(self):

        with open(self.loc.format('../../config/twitter_creds.json')) as fp:

            config = json.load(fp)

        self.logger.info('Twitter credentials loaded')

        return config
项目:probablyPOTUS    作者:jjardel    | 项目源码 | 文件源码
def _load_model(self):

        self.logger.info('Loading serialized model')

        # hardcoded path
        path = self.loc.format('../saved_models/model.pkl')

        return joblib.load(path)
项目:probablyPOTUS    作者:jjardel    | 项目源码 | 文件源码
def transform(self):

        tweet_df = json_normalize(self.tweet)

        # drop all columns from tweet_df that we're not using in extract_fields
        with open(self.loc.format('../../etl/extract/extract_fields.json')) as fp:
            fields_dict = json.load(fp)
            fields_subset = fields_dict.get('fields')

        tweet_df = tweet_df.loc[:, fields_subset]

        # perform transformations on DF to get into same form as DB table
        tweet_df.loc[:, 'retweets_to_faves'] = 0

        # this feature isn't scaled properly since we're pulling from the stream
        #tweet_df.loc[:, 'retweets_to_faves'] = tweet_df.loc[:, 'retweet_count'] / tweet_df.loc[:, 'favorite_count']
        tweet_df.loc[:, 'num_characters'] = tweet_df.text.apply(lambda x: len(x))
        tweet_df.loc[:, 'num_exclamation_points'] = tweet_df.text.apply(lambda x: x.count('!'))
        tweet_df.loc[:, 'is_tweetstorm'] = 0
        tweet_df.loc[:, 'is_trump_retweet'] = tweet_df.text.apply(lambda x: is_retweet(x))
        tweet_df.loc[:, 'num_uppercase_strings'] = tweet_df.text.apply(lambda x: count_uppercase_substrings(x))
        tweet_df.loc[:, 'source'] = tweet_df.source.apply(lambda x: normalize_tweet_sources(x))

        tweet_df.rename(columns={
            'favorite_count': 'favorites',
            'quoted_status.text': 'quoted_status_text',
            'retweet_count': 'retweets',
            'source': 'tweet_source',
            'user.id_str': 'user_id_str',
            'user.name': 'user_name',
            'user.followers_count': 'followers',
            'user.screen_name': 'user_screen_name',
            'user.statuses_count': 'num_statuses'

        }, inplace=True)

        self.tweet_df = tweet_df
项目:Dense-Net    作者:achyudhk    | 项目源码 | 文件源码
def load_current_checkpoints(self):
        ''' Return the current checkpoint '''

        checkpoints = os.listdir(self.path_checkpoints)
        num = max([int(f.split('_')[1]) for f in checkpoints])
        name = 'check_' + str(num)
        return num, joblib.load(os.path.join(self.path_checkpoints, name, name + '.pkl'))
项目:Automatic-Question-Generation    作者:bwanglzu    | 项目源码 | 文件源码
def _classify(df):
    """Classification
    - Args:
        df(pandas.dataframe): candidate qa pairs with extracted features 
    - Returns:
        question_answers(pandas.dataframe): Question, Answer, Prediction (label)
    """
    model_path = os.path.dirname(os.path.abspath(__file__)) + '/models/clf.pkl'
    clf = joblib.load(model_path)
    question_answers = df[['Question', 'Answer']]
    X = df.drop(['Answer', 'Question', 'Sentence'], axis=1).as_matrix()
    y = clf.predict(X)
    question_answers['Prediction'] = y
    return question_answers
项目:rdocChallenge    作者:Elyne    | 项目源码 | 文件源码
def test(feats_test, estimator=None, model='model.pkl'):
    """
    Evaluate the generated machine learning model on test data, and print a mean absolute error.
    @param estimator: The trained ML model/estimator
    @param feats_test: test features (obtained from data)
    """
    if estimator is None:
        estimator = joblib.load(cfg.PATH_RESOURCES+model)

    return estimator.predict(feats_test)
项目:elephant_sense    作者:chakki-works    | 项目源码 | 文件源码
def load(self):
        self.classifier = joblib.load(self.model_path + "banana.pkl")
        self.scaler = joblib.load(self.model_path + "banana_scaler.pkl")
        with open(self.model_path + "banana_list.txt") as f:
            self.features = f.readline().split()
        return self