Python utils 模块，load_data() 实例源码

我们从Python开源项目中，提取了以下19个代码示例，用于说明如何使用utils.load_data()。

项目：phoneme_ctc 作者：tbornt | 项目源码 | 文件源码

def train(ENV, args):
    processed_train_data_path = os.path.join(ENV.processed_data_path, 'processed_train.pkl')
    processed_test_data_path = os.path.join(ENV.processed_data_path, 'processed_test.pkl')
    if os.path.exists(processed_train_data_path) and os.path.exists(processed_test_data_path):
        processed_train_data = pickle.load(open(processed_train_data_path, 'r'))
        processed_test_data = pickle.load(open(processed_test_data_path, 'r'))
    else:
        train_wav_files, train_phn_files = load_data(ENV.train_data)
        print('Process train data...')
        processed_train_data = process_data(train_wav_files, train_phn_files)
        test_wav_files, test_phn_files = load_data(ENV.test_data)
        print('Process test data...')
        processed_test_data = process_data(test_wav_files, test_phn_files)
        pickle.dump(processed_train_data, open(processed_train_data_path, 'w'))
        pickle.dump(processed_test_data, open(processed_test_data_path, 'w'))
    # print(processed_train_data[0][1])
    print("Define graph...")
    train_model(ENV, processed_train_data, processed_test_data)

项目：tdlstm 作者：bluemonk482 | 项目源码 | 文件源码

def skoptTUNE(args, model, n_calls):
    """
    Hyper-parameter optimization using scikit-opt.
    It has 3 algorithms: forest_minimize (decision-tree regression search);
    gbrt_minimize (gradient-boosted-tree search);
    and hp_minimize (Gaussian process regression search).
    """
    hyperparameters = {
        'batch_size': (40, 120),
        'num_hidden': (100, 500),
        'dropout_output': (0.3, 1.0),
        'dropout_input': (0.3, 1.0),
        'clip_norm': (0.5, 1.0),
    }

    data = load_data(args, args.data, saved=args.load_data)
    all_res = skopt_search(args, data, model, hyperparameters, gp_minimize, n_calls=n_calls)
    print(all_res)

项目：tdlstm 作者：bluemonk482 | 项目源码 | 文件源码

def hyperoptTUNE(args, model, n_calls):
    """
    Search the hyper-parameter space according to the tree of Parzen estimators;
    a Bayesian approach.
    """
    hyperparameters = {
        'batch_size': hp.choice('batch_size', range(40, 130, 20)),
        'num_hidden': hp.quniform('num_hidden', 100, 500, 1),
        # 'learning_rate': hp.choice('learning_rate', [0.0005]),
        'dropout_output': hp.quniform('dropout_output', 0.3, 1.0, 0.1),
        'dropout_input': hp.quniform('dropout_input', 0.3, 1.0, 0.1),
        'clip_norm': hp.quniform('clip_norm', 0.5, 1.0, 0.1),
    }

    data = load_data(args, args.data, saved=args.load_data)
    best_params, all_res = hyperopt_search(args, data, model, hyperparameters, max_evals=n_calls)
    print(best_params)

项目：RobotNSGA 作者：LuisLaraP | 项目源码 | 文件源码

def main(args):
    '''Module main method'''
    random.seed()
    problem = MathProblem()

    database = utils.initialize_database(args, 'MathDatabase')
    database.set_objective_names(['cos', 'sinc'])

    generation = database.properties['highest_population']
    population_size = database.properties['population_size']
    genetic_algorithm = evolution.NSGA(problem, population_size)
    if generation > 0:
        parents, children = utils.load_data(database)
        genetic_algorithm.set_population(parents)
        genetic_algorithm.set_children(children)
    for _ in range(args.iterations):
        generation += 1
        print('Starting generation ' + str(generation))
        genetic_algorithm.iterate()
        database.create_population()
        utils.save_data(genetic_algorithm, database)
        print('=' * (SCREEN_WIDTH - 1))

项目：MachineLearningProject 作者：ymynem | 项目源码 | 文件源码

def read_gram_from_file(i, n, l, comment=""):
    data = load_data(_get_gram_file_name(i, n, l, comment=comment))
    return data

项目：taxi 作者：xuguanggen | 项目源码 | 文件源码

def run(result_csv_path):
    train_x,train_y = load_data(train_csv_path,True)
    test_x = load_data(test_csv_path,False)
    print('load data successfully ......')

    rf = RandomForestRegressor(
            n_estimators = 2000, #[1500,2000]
            min_samples_split = 2,
            max_depth = 15, # [10,15]
            n_jobs = -1
            )
    rf.fit(train_x,train_y)
    ###### save model ##################
    joblib.dump(rf,'weights/'+Model_Name+'.m')

    y_pred = rf.predict(test_x)


    ####### save_results ###########################
    save_results(result_csv_path,y_pred)

    ###### generate report #######################
    feature_importances = rf.feature_importances_
    dic_feature_importances = dict(zip(fields,feature_importances))
    dic = sorted(dic_feature_importances.iteritems(),key = lambda d:d[1],reverse = True)
    print('feature_importances:')
    for i in range(len(dic)):
        print(dic[i][0]+":\t"+str(dic[i][1]))

项目：traffic-prediction 作者：JonnoFTW | 项目源码 | 文件源码

def step_data(FPATH, end_date=None):
    all_data = load_data(FPATH, EPS, end_date=end_date, use_sensors=[5])
    return all_data

项目：traffic-prediction 作者：JonnoFTW | 项目源码 | 文件源码

def step_data():
    EPS = 1e-6
    all_data = load_data(FPATH, EPS)
    return all_data

项目：traffic-prediction 作者：JonnoFTW | 项目源码 | 文件源码

def step_data(FPATH, end_date=None):
    all_data = load_data(FPATH, EPS, end_date=end_date)
    return all_data

项目：traffic-prediction 作者：JonnoFTW | 项目源码 | 文件源码

def step_data(FPATH, end_date=None, use_sensors=None, use_datetime=False):
    all_data = load_data(FPATH, EPS, use_sensors=use_sensors, use_datetime=use_datetime)
    return all_data

项目：tdlstm 作者：bluemonk482 | 项目源码 | 文件源码

def TUNE(args, model, mode, n_calls=5):
    hyperparameters_all = {
        'batch_size': range(40, 130, 20),
        'seq_len': [42],
        'num_hidden': np.random.randint(100, 501, 10),
        'learning_rate': [0.0005],
        'dropout_output': np.arange(0.3, 1.1, 0.1),
        'dropout_input': np.arange(0.3, 1.1, 0.1),
        'clip_norm': np.arange(0.5, 1.01, 0.1),
    }

    maxx = 0
    data = load_data(args, args.data, saved=args.load_data)
    if mode == 'rand':
        samp = random_search(hyperparameters_all, n_calls) #random search
    else:
        samp = expand_grid(hyperparameters_all) #grid-search
    for hyperparameters in samp:
        print("Evaluating hyperparameters:", hyperparameters)
        for attr, value in hyperparameters.items():
            setattr(args, attr, value)
        scores = run_network(args, data, model, tuning=args.tune)
        test_score, eval_score = scores
        if eval_score[0] > maxx:
            maxx = eval_score[0]
            best_score = test_score
            hyperparameters_best = hyperparameters
        tf.reset_default_graph()
    print()
    print("Optimisation finished..")
    print("Optimised hyperparameters:")
    with open(os.path.dirname(args.checkpoint_file)+'/checkpoint', 'w') as fp:
        fp.write('%s:"%s"\n' % ('model',args.model))
        for attr, value in sorted(hyperparameters_best.items()):
            print("{}={}".format(attr.upper(), value))
            fp.write('%s:"%s"\n' % (attr, value))
    print()
    print("Final Test Data Accuracy = {:.5f}; 3-class F1 = {:.5f}; 2-class F1 = {:.5f}"
                          .format(best_score[0], best_score[1], best_score[2]))

项目：tdlstm 作者：bluemonk482 | 项目源码 | 文件源码

def TRAIN(args, model):
    t0 = time.time()
    print("\nParameters:")
    for attr, value in sorted(vars(args).items()):
        print("{}={}".format(attr.upper(), value))
    print()
    print("Graph initialized..")
    t1 = time.time()
    print("time taken:", t1-t0)
    print()
    data = load_data(args, args.data, saved=args.load_data)
    run_network(args, data, model, tuning=args.tune)

项目：nmt-seq2seq 作者：ZeweiChu | 项目源码 | 文件源码

def main(args):

    if os.path.isfile(args.vocab_file):
        en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(open(args.vocab_file, "rb"))
    else:
        print("vocab file does not exit!")
        exit(-1)

    args.en_total_words = en_total_words
    args.cn_total_words = cn_total_words
    inv_en_dict = {v: k for k, v in en_dict.items()}
    inv_cn_dict = {v: k for k, v in cn_dict.items()}



    if os.path.isfile(args.model_file):
        model = torch.load(args.model_file)
    else:
        print("model file does not exit!")
        exit(-1)

    if args.use_cuda:
        model = model.cuda()

    crit = utils.LanguageModelCriterion()

    test_en, test_cn = utils.load_data(args.test_file)
    args.num_test = len(test_en)
    test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict)
    test_data = utils.gen_examples(test_en, test_cn, args.batch_size)

    translate(model, test_data, en_dict, inv_en_dict, cn_dict, inv_cn_dict)

    correct_count, loss, num_words = eval(model, test_data, args, crit)
    loss = loss / num_words
    acc = correct_count / num_words
    print("test loss %s" % (loss) )
    print("test accuracy %f" % (acc))
    print("test total number of words %f" % (num_words))

项目：char-cnn-text-classification-tensorflow 作者：kinni | 项目源码 | 文件源码

def sample(args):
    print 'Loading data'
    x, y, vocabulary, vocabulary_inv = utils.load_data()

    text = [list(args.text)]
    sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1])
    raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary)

    checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir)
    graph = tf.Graph()
    with graph.as_default():
        sess = tf.Session()
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x = graph.get_operation_by_name("input_x").outputs[0]
            # input_y = graph.get_operation_by_name("input_y").outputs[0]
            dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name("output/predictions").outputs[0]

            predicted_result = sess.run(predictions, {input_x: raw_x, dropout_keep_prob: 1.0})
            if (predicted_result[0] == 0):
                print args.text + ": negative"
            else:
                print args.text + ": positive"

项目：CIAN 作者：yanghanxy | 项目源码 | 文件源码

def train_model(opt, logger):
    logger.info('---START---')
    # initialize for reproduce
    np.random.seed(opt.seed)

    # load data
    logger.info('---LOAD DATA---')
    opt, training, training_snli, validation, test_matched, test_mismatched = load_data(opt)

    if not opt.skip_train:
        logger.info('---TRAIN MODEL---')
        for train_counter in range(opt.max_epochs):
            if train_counter == 0:
                model = build_model(opt)
            else:
                model = load_model_local(opt)
            np.random.seed(train_counter)
            lens = len(training_snli[-1])
            perm = np.random.permutation(lens)
            idx = perm[:int(lens * 0.2)]
            train_data = [np.concatenate((training[0], training_snli[0][idx])),
                          np.concatenate((training[1], training_snli[1][idx])),
                          np.concatenate((training[2], training_snli[2][idx]))]
            csv_logger = CSVLogger('{}{}.csv'.format(opt.log_dir, opt.model_name), append=True)
            cp_filepath = opt.save_dir + "cp-" + opt.model_name + "-" + str(train_counter) + "-{val_acc:.2f}.h5"
            cp = ModelCheckpoint(cp_filepath, monitor='val_acc', save_best_only=True, save_weights_only=True)
            callbacks = [cp, csv_logger]
            model.fit(train_data[:-1], train_data[-1], batch_size=opt.batch_size, epochs=1, validation_data=(validation[:-1], validation[-1]), callbacks=callbacks)
            save_model_local(opt, model)
    else:
        logger.info('---LOAD MODEL---')
        model = load_model_local(opt)

    # predict
    logger.info('---TEST MODEL---')
    preds_matched = model.predict(test_matched[:-1], batch_size=128, verbose=1)
    preds_mismatched = model.predict(test_mismatched[:-1], batch_size=128, verbose=1)

    save_preds_matched_to_csv(preds_matched, test_mismatched[-1], opt)
    save_preds_mismatched_to_csv(preds_mismatched, test_mismatched[-1], opt)

项目：RobotNSGA 作者：LuisLaraP | 项目源码 | 文件源码

def main(args):
    '''Module main function'''
    global database
    global genetic_algorithm
    global joint_positions
    global goal_positions
    pygame.init()
    random.seed()
    database = utils.initialize_database(args, 'RobotTrainingData')
    database.set_objective_names(['Tiempo', r'Error en $\theta_1$', r'Error en $\theta_2$', r'Error en $\theta_3$', 'Energía'])
    problem = EV3Problem()
    generation = database.properties['highest_population']
    population_size = database.properties['population_size']
    genetic_algorithm = evolution.NSGA(problem, population_size)

    x_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'x_train.txt'))
    y_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'y_train.txt'))
    batch_start = (generation % 10) * N_GOALS
    joint_positions = np.loadtxt(x_path)[batch_start : batch_start + N_GOALS, :]
    goal_positions = np.loadtxt(y_path)[batch_start : batch_start + N_GOALS, :]

    if generation > 0:
        parents, children = utils.load_data(database)
        genetic_algorithm.set_population(parents)
        genetic_algorithm.set_children(children)
    for _ in range(args.iterations):
        generation += 1
        print('Starting generation ' + str(generation))
        genetic_algorithm.iterate()
        database.create_population()
        utils.save_data(genetic_algorithm, database)
        print('=' * (SCREEN_WIDTH - 1))

项目：taxi 作者：xuguanggen | 项目源码 | 文件源码

def run(result_csv_path):
    train_x,train_y = load_data(train_csv_path,True)
    test_x = load_data(test_csv_path,False)
    print('load data successfully.........')

    layer1_rf_paramters ={
            'max_depth':range(15,21),
            'max_features': [0.5,0.6,0.8],
            'min_samples_leaf':[1,3,10]
            }

    print('layer 1 train..........')
    layer1_rf = RandomForestRegressor(
            n_estimators = 2500,
            n_jobs = -1
            )
    layer1_gs_rf = GridSearchCV(layer1_rf,param_grid = layer1_rf_paramters)
    layer1_gs_rf.fit(train_x,train_y)
    ################# save model##################
    joblib.dump(layer1_gs_rf,'weights/layer1_'+Model_Name+'.m')

    #layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m')
    tr_pred = layer1_gs_rf.predict(train_x)
    train_x = feature_engineer(layer1_gs_rf,train_x,tr_pred)

    te_pred = layer1_gs_rf.predict(test_x)
    test_x = feature_engineer(layer1_gs_rf,test_x,te_pred)

    print('layer 2 train ............')
    layer2_rf = RandomForestRegressor(
            n_jobs = -1,
            n_estimators = 1000,
            max_features = 'sqrt',
            max_depth = 18,
            bootstrap = False
            )
    layer2_rf.fit(train_x,train_y)
    joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m')
    y_pred = layer2_rf.predict(test_x)

    ############ save_results ########################
    save_results(result_csv_path,y_pred)

项目：taxi 作者：xuguanggen | 项目源码 | 文件源码

def run(result_csv_path):
    train_x,train_y = load_data(train_csv_path,True)
    test_x = load_data(test_csv_path,False)
    print('load data successfully.........')

    print('layer 1 train..........')
    layer1_rf = RandomForestRegressor(
            n_estimators = 2500,
            max_features = 0.8,
            bootstrap = False,
            max_depth = 15,
            n_jobs = -1
            )
    layer1_rf.fit(train_x,train_y)
    ################# save model##################
    joblib.dump(layer1_rf,'weights/layer1_'+Model_Name+'.m')

    #layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m')
    tr_pred = layer1_rf.predict(train_x)
    train_x = feature_engineer(layer1_rf,train_x,tr_pred)

    te_pred = layer1_rf.predict(test_x)
    test_x = feature_engineer(layer1_rf,test_x,te_pred)

    print('layer 2 train ............')
    layer2_rf = RandomForestRegressor(
            n_jobs = -1,
            n_estimators = 800, #[600]
            max_features = 'sqrt',
            max_depth = 20,
            bootstrap = False
            )
    layer2_rf.fit(train_x,train_y)
    joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m')

    tr_pred = layer2_rf.predict(train_x)
    train_x = feature_engineer(layer2_rf,train_x,tr_pred)
    te_pred = layer2_rf.predict(test_x)
    test_x = feature_engineer(layer2_rf,test_x,te_pred)

    print('layer 3 train ..............')
    layer3_rf = RandomForestRegressor(
            n_jobs = -1,
            n_estimators = 600, #[500]
            max_features = 'sqrt',
            max_depth = 20,
            bootstrap = False
            )
    layer3_rf.fit(train_x,train_y)
    joblib.dump(layer3_rf,'weights/layer3_'+Model_Name+'.m')
    y_pred = layer3_rf.predict(test_x)
    ############ save_results ########################
    save_results(result_csv_path,y_pred)

项目：2016CCF-unicom 作者：xuguanggen | 项目源码 | 文件源码

def run():
    tr_x ,tr_y = load_data(TRAIN,True)
    te_x = load_data(TEST,False)
    rf = RandomForestClassifier(
            n_estimators = 500,
            max_depth = 11,
            min_samples_split =2,
            bootstrap =True,
            warm_start = True,
            max_features = 'sqrt',
            criterion='entropy',
            class_weight = 'balanced',
            n_jobs = -1
            )
    #rf.fit(tr_x,tr_y)
    ##feature_importances = rf.feature_importances_
    ##dic_feature_importances = dict(zip(Features,feature_importances))
    ##dic = sorted(dic_feature_importances.iteritems(),key=lambda d:d[1],reverse=True)
    ##print('===========================\n')
    ##print('feature_importances:')
    ##for i in range(len(dic)):
    ##    print(dic[i][0]+":\t"+str(dic[i][1]))
    #te_pred = rf.predict(te_x)
    #save_results(result_csv_path,te_pred)

    #sum_acc = 0
    #cv = 10
    #kf = KFold(tr_x.shape[0],n_folds = cv,shuffle=True)
    #for train,val in kf:
    #    x_tr,x_val,y_tr,y_val = tr_x[train],tr_x[val],tr_y[train],tr_y[val]
    #    rf.fit(x_tr,y_tr)
    #    pred_val = rf.predict(x_val)
    #    true_count = 0
    #    for i in range(len(y_val)):
    #        if y_val[i] == pred_val[i]:
    #            true_count += 1
    #    acc = true_count*1.0/len(pred_val)
    #    sum_acc += acc
    #    print('acc :'+ str(acc))
    #print('avg acc:'+str(sum_acc/cv))
    cv = 10
    scores = cross_val_score(rf,tr_x,tr_y,cv=cv,scoring='f1_weighted')
    avg_score = sum(scores)/cv
    print(str(scores))
    print('scores:\t'+str(avg_score))
    #while True:
    #    #rf.fit(tr_x,tr_y)
    #    scores = cross_val_score(rf,tr_x,tr_y,cv=cv,scoring='f1_weighted')
    #    avg_score = sum(scores)/cv
    #    print(str(scores))
    #    print('scores:\t'+str(avg_score))
    #    if avg_score > 0.6:
    #        te_pred = rf.predict(te_x)
    #        save_results(result_csv_path,te_pred)
    #        break

    #print(str(scores))
    #print(str(sum(scores)/cv))
    ########################################################################################