我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用data.Dataset()。
def _get_next_minibatch(self): """For txt mode, read image in training process""" if params._use_prefetch: return self._blob_queue.get() db_inds = self._get_next_minibatch_inds() datum = [] #dataset = data.Dataset() data = [] def addIm(img): im = cv2.imread(img) im = dataset.pad(im) im = im - params._mean # change (h,w,c) to (c,h,w) im = np.rollaxis(im,2) data.append(im) for i in db_inds: for idx in xrange(3): try: addIm(self._db[i][idx]) except: print "error with read line: ",self._db[i] data = np.array(data) datum.append(data.copy()) blobs = {name: datum[i] for name,i in self._name_to_top_map.iteritems()} return blobs
def _get_next_minibatch(self): """For txt mode, read image in training process""" db_inds = self._get_next_minibatch_inds() datum = [] #dataset = data.Dataset() data = [] def addIm(img): im = cv2.imread(img) im = dataset.pad(im) im = im - params._mean # change (h,w,c) to (c,h,w) im = np.rollaxis(im,2) data.append(im) for i in db_inds: for idx in xrange(3): try: addIm(self._db[i][idx]) except Exception as e: print "error with read line: ",self._db[i] print "error type: ",e sys.exit(0) data = np.array(data) datum.append(data.copy()) blobs = {name: datum[i] for name,i in self._name_to_top_map.iteritems()} return blobs
def __init__(self, model): ''' Setup directories, dataset, model, and optimizer ''' self.batch_size = FLAGS.batch_size self.iterations = FLAGS.iterations self.learning_rate = FLAGS.learning_rate self.model_dir = FLAGS.model_dir # directory to write model summaries to self.dataset_dir = FLAGS.dataset_dir # directory containing data self.samples_dir = FLAGS.samples_dir # directory for sampled images self.device_id = FLAGS.device_id self.use_gpu = FLAGS.use_gpu # create directories if they don"t exist yert if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) if not os.path.exists(self.dataset_dir): os.makedirs(self.dataset_dir) if not os.path.exists(self.samples_dir): os.makedirs(self.samples_dir) if self.use_gpu: device_str = '/gpu:' + str(self.device_id) else: device_str = '/cpu:0' with tf.device(device_str): self.global_step = tf.get_variable("global_step", [], initializer=tf.constant_initializer(0), trainable=False) # parse data and create model self.dataset = Dataset(self.dataset_dir, self.iterations, self.batch_size) self.model = model(self.dataset.hr_images, self.dataset.lr_images) learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step, 500000, 0.5, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=0.95, momentum=0.9, epsilon=1e-8) self.train_optimizer = optimizer.minimize(self.model.loss, global_step=self.global_step)
def _get_next_minibatch_mp(self): """For multiprocessing mode, read image in training process""" db_inds = self._get_next_minibatch_inds() datum = [] def addIm(img): im = cv2.imread(img) im = dataset.pad(im) im = im - params._mean # change (h,w,c) to (c,h,w) im = np.rollaxis(im,2) return im #dataset = data.Dataset() lock = threading.Lock() def process(i): for idx in xrange(3): try: im = addIm(self._db[i][idx]) except: print "error with read line: ",self._db[i] lock.acquire() data[str(i)+str(idx)] = im lock.release() data = {} arr = [] #p = ThreadPool(params._num_thread) p.map(process, [i for i in db_inds]) for i in db_inds: arr.append(data[str(i)+str(0)]) arr.append(data[str(i)+str(1)]) arr.append(data[str(i)+str(2)]) arr = np.array(arr) datum.append(arr.copy()) blobs = {name: datum[i] for name,i in self._name_to_top_map.iteritems()} return blobs
def train(net_type, generator_fn_str, dataset_file, build_net_fn, featurized=True): d = Dataset(dataset_file + 'train.pgn') generator_fn = getattr(d, generator_fn_str) d_test = Dataset(dataset_file + 'test.pgn') X_val, y_val = d_test.load(generator_fn.__name__, featurized = featurized, refresh = False, board = net_type) board_num_channels = X_val[0].shape[1] if net_type == 'to' else X_val[0].shape[0] model = build_net_fn(board_num_channels=board_num_channels, net_type=net_type) start_time = str(int(time.time())) try: plot_model(model, start_time, net_type) except: print("Skipping plot") from keras.callbacks import ModelCheckpoint checkpointer = ModelCheckpoint( filepath = get_filename_for_saving(start_time, net_type), verbose = 2, save_best_only = True) model.fit_generator(generator_fn(featurized=featurized, board=net_type), samples_per_epoch = SAMPLES_PER_EPOCH, nb_epoch = NUMBER_EPOCHS, callbacks = [checkpointer], validation_data = (X_val, y_val), verbose = VERBOSE_LEVEL)
def main(): """ Run training and export summaries to data_dir/logs for a single test setup and a single set of parameters. Summaries include a) TensorBoard summaries, b) the latest train/test accuracies and raw edit distances (status.txt), c) the latest test predictions along with test ground-truth labels (test_label_seqs.pkl, test_prediction_seqs.pkl), d) visualizations as training progresses (test_visualizations_######.png).""" args = define_and_process_args() print('\n', 'ARGUMENTS', '\n\n', args, '\n') log_dir = get_log_dir(args) print('\n', 'LOG DIRECTORY', '\n\n', log_dir, '\n') standardized_data_path = os.path.join(args.data_dir, args.data_filename) if not os.path.exists(standardized_data_path): message = '%s does not exist.' % standardized_data_path raise ValueError(message) dataset = data.Dataset(standardized_data_path) train_raw_seqs, test_raw_seqs = dataset.get_splits(args.test_users) train_triplets = [data.prepare_raw_seq(seq) for seq in train_raw_seqs] test_triplets = [data.prepare_raw_seq(seq) for seq in test_raw_seqs] train_input_seqs, train_reset_seqs, train_label_seqs = zip(*train_triplets) test_input_seqs, test_reset_seqs, test_label_seqs = zip(*test_triplets) Model = eval('models.' + args.model_type + 'Model') input_size = dataset.input_size target_size = dataset.num_classes # This is just to satisfy a low-CPU requirement on our cluster # when using GPUs. if 'CUDA_VISIBLE_DEVICES' in os.environ: config = tf.ConfigProto(intra_op_parallelism_threads=2, inter_op_parallelism_threads=2) else: config = None with tf.Session(config=config) as sess: model = Model(input_size, target_size, args.num_layers, args.hidden_layer_size, args.init_scale, args.dropout_keep_prob) optimizer = optimizers.Optimizer( model.loss, args.num_train_sweeps, args.initial_learning_rate, args.num_initial_sweeps, args.num_sweeps_per_decay, args.decay_factor, args.max_global_grad_norm) train(sess, model, optimizer, log_dir, args.batch_size, args.num_sweeps_per_summary, args.num_sweeps_per_save, train_input_seqs, train_reset_seqs, train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs)
def validate(model_hdf5, net_type, generator_fn_str, dataset_file, featurized=True): from keras.models import load_model import data d_test = Dataset(dataset_file + 'test.pgn') X_val, y_val = d_test.load(generator_fn_str, featurized = featurized, refresh = False, board = "both") boards = data.board_from_state(X_val) if net_type == "from": model_from = load_model("saved/" + model_hdf5) y_hat_from = model_from.predict(X_val) num_correct = 0 for i in range(len(boards)): if y_val[0][i,np.argmax(y_hat_from[i])] > 0: num_correct += 1 print(num_correct / len(boards)) elif net_type == "to": model_to = load_model("saved/" + model_hdf5) y_hat_to = model_to.predict([X_val, y_val[0].reshape(y_val[0].shape[0],1,X_val.shape[2],X_val.shape[3])]) num_correct = 0 for i in range(len(boards)): if y_val[1][i,np.argmax(y_hat_to[i])] > 0: num_correct += 1 print(num_correct / len(boards)) elif net_type == "from_to": model_from = load_model("saved/" + model_hdf5[0]) model_to = load_model("saved/" + model_hdf5[1]) y_hat_from = model_from.predict(X_val) for i in range(len(boards)): from_square = np.argmax(y_hat_from[i]) y_max_from = np.zeros((1,1,X_val.shape[2],X_val.shape[3])) y_max_from.flat[from_square] = 1 y_hat_to = model_to.predict([np.expand_dims(X_val[i], 0), y_max_from]) to_square = np.argmax(y_hat_to) move_attempt = data.move_from_action(from_square, to_square) if boards[i].is_legal(move_attempt): print("YAY") else: print("BOO") print(move_attempt) move = data.move_from_action(np.argmax(y_val[0]), np.argmax(y_val[1])) print(move)