我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用utils.load_data()。
def train(ENV, args): processed_train_data_path = os.path.join(ENV.processed_data_path, 'processed_train.pkl') processed_test_data_path = os.path.join(ENV.processed_data_path, 'processed_test.pkl') if os.path.exists(processed_train_data_path) and os.path.exists(processed_test_data_path): processed_train_data = pickle.load(open(processed_train_data_path, 'r')) processed_test_data = pickle.load(open(processed_test_data_path, 'r')) else: train_wav_files, train_phn_files = load_data(ENV.train_data) print('Process train data...') processed_train_data = process_data(train_wav_files, train_phn_files) test_wav_files, test_phn_files = load_data(ENV.test_data) print('Process test data...') processed_test_data = process_data(test_wav_files, test_phn_files) pickle.dump(processed_train_data, open(processed_train_data_path, 'w')) pickle.dump(processed_test_data, open(processed_test_data_path, 'w')) # print(processed_train_data[0][1]) print("Define graph...") train_model(ENV, processed_train_data, processed_test_data)
def skoptTUNE(args, model, n_calls): """ Hyper-parameter optimization using scikit-opt. It has 3 algorithms: forest_minimize (decision-tree regression search); gbrt_minimize (gradient-boosted-tree search); and hp_minimize (Gaussian process regression search). """ hyperparameters = { 'batch_size': (40, 120), 'num_hidden': (100, 500), 'dropout_output': (0.3, 1.0), 'dropout_input': (0.3, 1.0), 'clip_norm': (0.5, 1.0), } data = load_data(args, args.data, saved=args.load_data) all_res = skopt_search(args, data, model, hyperparameters, gp_minimize, n_calls=n_calls) print(all_res)
def hyperoptTUNE(args, model, n_calls): """ Search the hyper-parameter space according to the tree of Parzen estimators; a Bayesian approach. """ hyperparameters = { 'batch_size': hp.choice('batch_size', range(40, 130, 20)), 'num_hidden': hp.quniform('num_hidden', 100, 500, 1), # 'learning_rate': hp.choice('learning_rate', [0.0005]), 'dropout_output': hp.quniform('dropout_output', 0.3, 1.0, 0.1), 'dropout_input': hp.quniform('dropout_input', 0.3, 1.0, 0.1), 'clip_norm': hp.quniform('clip_norm', 0.5, 1.0, 0.1), } data = load_data(args, args.data, saved=args.load_data) best_params, all_res = hyperopt_search(args, data, model, hyperparameters, max_evals=n_calls) print(best_params)
def main(args): '''Module main method''' random.seed() problem = MathProblem() database = utils.initialize_database(args, 'MathDatabase') database.set_objective_names(['cos', 'sinc']) generation = database.properties['highest_population'] population_size = database.properties['population_size'] genetic_algorithm = evolution.NSGA(problem, population_size) if generation > 0: parents, children = utils.load_data(database) genetic_algorithm.set_population(parents) genetic_algorithm.set_children(children) for _ in range(args.iterations): generation += 1 print('Starting generation ' + str(generation)) genetic_algorithm.iterate() database.create_population() utils.save_data(genetic_algorithm, database) print('=' * (SCREEN_WIDTH - 1))
def read_gram_from_file(i, n, l, comment=""): data = load_data(_get_gram_file_name(i, n, l, comment=comment)) return data
def run(result_csv_path): train_x,train_y = load_data(train_csv_path,True) test_x = load_data(test_csv_path,False) print('load data successfully ......') rf = RandomForestRegressor( n_estimators = 2000, #[1500,2000] min_samples_split = 2, max_depth = 15, # [10,15] n_jobs = -1 ) rf.fit(train_x,train_y) ###### save model ################## joblib.dump(rf,'weights/'+Model_Name+'.m') y_pred = rf.predict(test_x) ####### save_results ########################### save_results(result_csv_path,y_pred) ###### generate report ####################### feature_importances = rf.feature_importances_ dic_feature_importances = dict(zip(fields,feature_importances)) dic = sorted(dic_feature_importances.iteritems(),key = lambda d:d[1],reverse = True) print('feature_importances:') for i in range(len(dic)): print(dic[i][0]+":\t"+str(dic[i][1]))
def step_data(FPATH, end_date=None): all_data = load_data(FPATH, EPS, end_date=end_date, use_sensors=[5]) return all_data
def step_data(): EPS = 1e-6 all_data = load_data(FPATH, EPS) return all_data
def step_data(FPATH, end_date=None): all_data = load_data(FPATH, EPS, end_date=end_date) return all_data
def step_data(FPATH, end_date=None, use_sensors=None, use_datetime=False): all_data = load_data(FPATH, EPS, use_sensors=use_sensors, use_datetime=use_datetime) return all_data
def TUNE(args, model, mode, n_calls=5): hyperparameters_all = { 'batch_size': range(40, 130, 20), 'seq_len': [42], 'num_hidden': np.random.randint(100, 501, 10), 'learning_rate': [0.0005], 'dropout_output': np.arange(0.3, 1.1, 0.1), 'dropout_input': np.arange(0.3, 1.1, 0.1), 'clip_norm': np.arange(0.5, 1.01, 0.1), } maxx = 0 data = load_data(args, args.data, saved=args.load_data) if mode == 'rand': samp = random_search(hyperparameters_all, n_calls) #random search else: samp = expand_grid(hyperparameters_all) #grid-search for hyperparameters in samp: print("Evaluating hyperparameters:", hyperparameters) for attr, value in hyperparameters.items(): setattr(args, attr, value) scores = run_network(args, data, model, tuning=args.tune) test_score, eval_score = scores if eval_score[0] > maxx: maxx = eval_score[0] best_score = test_score hyperparameters_best = hyperparameters tf.reset_default_graph() print() print("Optimisation finished..") print("Optimised hyperparameters:") with open(os.path.dirname(args.checkpoint_file)+'/checkpoint', 'w') as fp: fp.write('%s:"%s"\n' % ('model',args.model)) for attr, value in sorted(hyperparameters_best.items()): print("{}={}".format(attr.upper(), value)) fp.write('%s:"%s"\n' % (attr, value)) print() print("Final Test Data Accuracy = {:.5f}; 3-class F1 = {:.5f}; 2-class F1 = {:.5f}" .format(best_score[0], best_score[1], best_score[2]))
def TRAIN(args, model): t0 = time.time() print("\nParameters:") for attr, value in sorted(vars(args).items()): print("{}={}".format(attr.upper(), value)) print() print("Graph initialized..") t1 = time.time() print("time taken:", t1-t0) print() data = load_data(args, args.data, saved=args.load_data) run_network(args, data, model, tuning=args.tune)
def main(args): if os.path.isfile(args.vocab_file): en_dict, cn_dict, en_total_words, cn_total_words = pickle.load(open(args.vocab_file, "rb")) else: print("vocab file does not exit!") exit(-1) args.en_total_words = en_total_words args.cn_total_words = cn_total_words inv_en_dict = {v: k for k, v in en_dict.items()} inv_cn_dict = {v: k for k, v in cn_dict.items()} if os.path.isfile(args.model_file): model = torch.load(args.model_file) else: print("model file does not exit!") exit(-1) if args.use_cuda: model = model.cuda() crit = utils.LanguageModelCriterion() test_en, test_cn = utils.load_data(args.test_file) args.num_test = len(test_en) test_en, test_cn = utils.encode(test_en, test_cn, en_dict, cn_dict) test_data = utils.gen_examples(test_en, test_cn, args.batch_size) translate(model, test_data, en_dict, inv_en_dict, cn_dict, inv_cn_dict) correct_count, loss, num_words = eval(model, test_data, args, crit) loss = loss / num_words acc = correct_count / num_words print("test loss %s" % (loss) ) print("test accuracy %f" % (acc)) print("test total number of words %f" % (num_words))
def sample(args): print 'Loading data' x, y, vocabulary, vocabulary_inv = utils.load_data() text = [list(args.text)] sentences_padded = utils.pad_sentences(text, maxlen=x.shape[1]) raw_x, dummy_y = utils.build_input_data(sentences_padded, [0], vocabulary) checkpoint_file = tf.train.latest_checkpoint(args.checkpoint_dir) graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] predicted_result = sess.run(predictions, {input_x: raw_x, dropout_keep_prob: 1.0}) if (predicted_result[0] == 0): print args.text + ": negative" else: print args.text + ": positive"
def train_model(opt, logger): logger.info('---START---') # initialize for reproduce np.random.seed(opt.seed) # load data logger.info('---LOAD DATA---') opt, training, training_snli, validation, test_matched, test_mismatched = load_data(opt) if not opt.skip_train: logger.info('---TRAIN MODEL---') for train_counter in range(opt.max_epochs): if train_counter == 0: model = build_model(opt) else: model = load_model_local(opt) np.random.seed(train_counter) lens = len(training_snli[-1]) perm = np.random.permutation(lens) idx = perm[:int(lens * 0.2)] train_data = [np.concatenate((training[0], training_snli[0][idx])), np.concatenate((training[1], training_snli[1][idx])), np.concatenate((training[2], training_snli[2][idx]))] csv_logger = CSVLogger('{}{}.csv'.format(opt.log_dir, opt.model_name), append=True) cp_filepath = opt.save_dir + "cp-" + opt.model_name + "-" + str(train_counter) + "-{val_acc:.2f}.h5" cp = ModelCheckpoint(cp_filepath, monitor='val_acc', save_best_only=True, save_weights_only=True) callbacks = [cp, csv_logger] model.fit(train_data[:-1], train_data[-1], batch_size=opt.batch_size, epochs=1, validation_data=(validation[:-1], validation[-1]), callbacks=callbacks) save_model_local(opt, model) else: logger.info('---LOAD MODEL---') model = load_model_local(opt) # predict logger.info('---TEST MODEL---') preds_matched = model.predict(test_matched[:-1], batch_size=128, verbose=1) preds_mismatched = model.predict(test_mismatched[:-1], batch_size=128, verbose=1) save_preds_matched_to_csv(preds_matched, test_mismatched[-1], opt) save_preds_mismatched_to_csv(preds_mismatched, test_mismatched[-1], opt)
def main(args): '''Module main function''' global database global genetic_algorithm global joint_positions global goal_positions pygame.init() random.seed() database = utils.initialize_database(args, 'RobotTrainingData') database.set_objective_names(['Tiempo', r'Error en $\theta_1$', r'Error en $\theta_2$', r'Error en $\theta_3$', 'Energía']) problem = EV3Problem() generation = database.properties['highest_population'] population_size = database.properties['population_size'] genetic_algorithm = evolution.NSGA(problem, population_size) x_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'x_train.txt')) y_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'y_train.txt')) batch_start = (generation % 10) * N_GOALS joint_positions = np.loadtxt(x_path)[batch_start : batch_start + N_GOALS, :] goal_positions = np.loadtxt(y_path)[batch_start : batch_start + N_GOALS, :] if generation > 0: parents, children = utils.load_data(database) genetic_algorithm.set_population(parents) genetic_algorithm.set_children(children) for _ in range(args.iterations): generation += 1 print('Starting generation ' + str(generation)) genetic_algorithm.iterate() database.create_population() utils.save_data(genetic_algorithm, database) print('=' * (SCREEN_WIDTH - 1))
def run(result_csv_path): train_x,train_y = load_data(train_csv_path,True) test_x = load_data(test_csv_path,False) print('load data successfully.........') layer1_rf_paramters ={ 'max_depth':range(15,21), 'max_features': [0.5,0.6,0.8], 'min_samples_leaf':[1,3,10] } print('layer 1 train..........') layer1_rf = RandomForestRegressor( n_estimators = 2500, n_jobs = -1 ) layer1_gs_rf = GridSearchCV(layer1_rf,param_grid = layer1_rf_paramters) layer1_gs_rf.fit(train_x,train_y) ################# save model################## joblib.dump(layer1_gs_rf,'weights/layer1_'+Model_Name+'.m') #layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m') tr_pred = layer1_gs_rf.predict(train_x) train_x = feature_engineer(layer1_gs_rf,train_x,tr_pred) te_pred = layer1_gs_rf.predict(test_x) test_x = feature_engineer(layer1_gs_rf,test_x,te_pred) print('layer 2 train ............') layer2_rf = RandomForestRegressor( n_jobs = -1, n_estimators = 1000, max_features = 'sqrt', max_depth = 18, bootstrap = False ) layer2_rf.fit(train_x,train_y) joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m') y_pred = layer2_rf.predict(test_x) ############ save_results ######################## save_results(result_csv_path,y_pred)
def run(result_csv_path): train_x,train_y = load_data(train_csv_path,True) test_x = load_data(test_csv_path,False) print('load data successfully.........') print('layer 1 train..........') layer1_rf = RandomForestRegressor( n_estimators = 2500, max_features = 0.8, bootstrap = False, max_depth = 15, n_jobs = -1 ) layer1_rf.fit(train_x,train_y) ################# save model################## joblib.dump(layer1_rf,'weights/layer1_'+Model_Name+'.m') #layer1_rf = joblib.load('weights/layer1_'+Model_Name+'.m') tr_pred = layer1_rf.predict(train_x) train_x = feature_engineer(layer1_rf,train_x,tr_pred) te_pred = layer1_rf.predict(test_x) test_x = feature_engineer(layer1_rf,test_x,te_pred) print('layer 2 train ............') layer2_rf = RandomForestRegressor( n_jobs = -1, n_estimators = 800, #[600] max_features = 'sqrt', max_depth = 20, bootstrap = False ) layer2_rf.fit(train_x,train_y) joblib.dump(layer2_rf,'weights/layer2_'+Model_Name+'.m') tr_pred = layer2_rf.predict(train_x) train_x = feature_engineer(layer2_rf,train_x,tr_pred) te_pred = layer2_rf.predict(test_x) test_x = feature_engineer(layer2_rf,test_x,te_pred) print('layer 3 train ..............') layer3_rf = RandomForestRegressor( n_jobs = -1, n_estimators = 600, #[500] max_features = 'sqrt', max_depth = 20, bootstrap = False ) layer3_rf.fit(train_x,train_y) joblib.dump(layer3_rf,'weights/layer3_'+Model_Name+'.m') y_pred = layer3_rf.predict(test_x) ############ save_results ######################## save_results(result_csv_path,y_pred)
def run(): tr_x ,tr_y = load_data(TRAIN,True) te_x = load_data(TEST,False) rf = RandomForestClassifier( n_estimators = 500, max_depth = 11, min_samples_split =2, bootstrap =True, warm_start = True, max_features = 'sqrt', criterion='entropy', class_weight = 'balanced', n_jobs = -1 ) #rf.fit(tr_x,tr_y) ##feature_importances = rf.feature_importances_ ##dic_feature_importances = dict(zip(Features,feature_importances)) ##dic = sorted(dic_feature_importances.iteritems(),key=lambda d:d[1],reverse=True) ##print('===========================\n') ##print('feature_importances:') ##for i in range(len(dic)): ## print(dic[i][0]+":\t"+str(dic[i][1])) #te_pred = rf.predict(te_x) #save_results(result_csv_path,te_pred) #sum_acc = 0 #cv = 10 #kf = KFold(tr_x.shape[0],n_folds = cv,shuffle=True) #for train,val in kf: # x_tr,x_val,y_tr,y_val = tr_x[train],tr_x[val],tr_y[train],tr_y[val] # rf.fit(x_tr,y_tr) # pred_val = rf.predict(x_val) # true_count = 0 # for i in range(len(y_val)): # if y_val[i] == pred_val[i]: # true_count += 1 # acc = true_count*1.0/len(pred_val) # sum_acc += acc # print('acc :'+ str(acc)) #print('avg acc:'+str(sum_acc/cv)) cv = 10 scores = cross_val_score(rf,tr_x,tr_y,cv=cv,scoring='f1_weighted') avg_score = sum(scores)/cv print(str(scores)) print('scores:\t'+str(avg_score)) #while True: # #rf.fit(tr_x,tr_y) # scores = cross_val_score(rf,tr_x,tr_y,cv=cv,scoring='f1_weighted') # avg_score = sum(scores)/cv # print(str(scores)) # print('scores:\t'+str(avg_score)) # if avg_score > 0.6: # te_pred = rf.predict(te_x) # save_results(result_csv_path,te_pred) # break #print(str(scores)) #print(str(sum(scores)/cv)) ########################################################################################