Python data 模块,Dataset() 实例源码

我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用data.Dataset()

项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def _get_next_minibatch(self):
        """For txt mode, read image in training process"""
        if params._use_prefetch:
            return self._blob_queue.get()
        db_inds = self._get_next_minibatch_inds()
        datum = []
        #dataset = data.Dataset()
        data = []
        def addIm(img):
            im = cv2.imread(img)
            im = dataset.pad(im)
            im = im - params._mean
            # change (h,w,c) to (c,h,w)
            im = np.rollaxis(im,2) 
            data.append(im)
        for i in db_inds:
            for idx in xrange(3):
                 try:
                    addIm(self._db[i][idx])   
                 except:
                    print "error with read line: ",self._db[i]
        data = np.array(data)
        datum.append(data.copy())
        blobs = {name: datum[i] for name,i in self._name_to_top_map.iteritems()}
        return blobs
项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def _get_next_minibatch(self):
        """For txt mode, read image in training process"""
        db_inds = self._get_next_minibatch_inds()
        datum = []
        #dataset = data.Dataset()
        data = []
        def addIm(img):
            im = cv2.imread(img)
            im = dataset.pad(im)
            im = im - params._mean
            # change (h,w,c) to (c,h,w)
            im = np.rollaxis(im,2) 
            data.append(im)
        for i in db_inds:
            for idx in xrange(3):
                try:
                    addIm(self._db[i][idx])   
                except Exception as e:
                    print "error with read line: ",self._db[i]
                    print "error type: ",e
                    sys.exit(0)
        data = np.array(data)
        datum.append(data.copy())
        blobs = {name: datum[i] for name,i in self._name_to_top_map.iteritems()}
        return blobs
项目:Pixel-Recursive-Super-Resolution    作者:hodgka    | 项目源码 | 文件源码
def __init__(self, model):
        '''
        Setup directories, dataset, model, and optimizer
        '''
        self.batch_size = FLAGS.batch_size
        self.iterations = FLAGS.iterations
        self.learning_rate = FLAGS.learning_rate

        self.model_dir = FLAGS.model_dir  # directory to write model summaries to
        self.dataset_dir = FLAGS.dataset_dir  # directory containing data
        self.samples_dir = FLAGS.samples_dir  # directory for sampled images
        self.device_id = FLAGS.device_id
        self.use_gpu = FLAGS.use_gpu

        # create directories if they don"t exist yert
        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)
        if not os.path.exists(self.dataset_dir):
            os.makedirs(self.dataset_dir)
        if not os.path.exists(self.samples_dir):
            os.makedirs(self.samples_dir)

        if self.use_gpu:
            device_str = '/gpu:' + str(self.device_id)
        else:
            device_str = '/cpu:0'
        with tf.device(device_str):
            self.global_step = tf.get_variable("global_step", [],
                                               initializer=tf.constant_initializer(0), trainable=False)

            # parse data and create model
            self.dataset = Dataset(self.dataset_dir, self.iterations, self.batch_size)
            self.model = model(self.dataset.hr_images, self.dataset.lr_images)
            learning_rate = tf.train.exponential_decay(self.learning_rate, self.global_step,
                                                       500000, 0.5,  staircase=True)
            optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=0.95, momentum=0.9, epsilon=1e-8)
            self.train_optimizer = optimizer.minimize(self.model.loss, global_step=self.global_step)
项目:QScode    作者:PierreHao    | 项目源码 | 文件源码
def _get_next_minibatch_mp(self):
        """For multiprocessing mode, read image in training process"""
        db_inds = self._get_next_minibatch_inds()
        datum = []
        def addIm(img):
            im = cv2.imread(img)
            im = dataset.pad(im)
            im = im - params._mean
            # change (h,w,c) to (c,h,w)
            im = np.rollaxis(im,2)
            return im 

        #dataset = data.Dataset()
        lock = threading.Lock()
        def process(i):
            for idx in xrange(3):
                try:
                    im = addIm(self._db[i][idx])
                except:
                    print "error with read line: ",self._db[i]
                lock.acquire()
                data[str(i)+str(idx)] = im
                lock.release()

        data = {}
        arr = []
        #p = ThreadPool(params._num_thread)
        p.map(process, [i for i in db_inds])
        for i in db_inds:
            arr.append(data[str(i)+str(0)])
            arr.append(data[str(i)+str(1)])
            arr.append(data[str(i)+str(2)])
        arr = np.array(arr)
        datum.append(arr.copy())
        blobs = {name: datum[i] for name,i in self._name_to_top_map.iteritems()}
        return blobs
项目:chess-deep-rl    作者:rajpurkar    | 项目源码 | 文件源码
def train(net_type, generator_fn_str, dataset_file, build_net_fn, featurized=True):
    d = Dataset(dataset_file + 'train.pgn')
    generator_fn = getattr(d, generator_fn_str)
    d_test = Dataset(dataset_file + 'test.pgn')

    X_val, y_val = d_test.load(generator_fn.__name__,
        featurized = featurized,
        refresh    = False,
        board = net_type)

    board_num_channels = X_val[0].shape[1] if net_type == 'to' else X_val[0].shape[0]
    model = build_net_fn(board_num_channels=board_num_channels, net_type=net_type)
    start_time = str(int(time.time()))
    try:
        plot_model(model, start_time, net_type)
    except:
        print("Skipping plot")
    from keras.callbacks import ModelCheckpoint
    checkpointer = ModelCheckpoint(
        filepath       = get_filename_for_saving(start_time, net_type),
        verbose        = 2,
        save_best_only = True)

    model.fit_generator(generator_fn(featurized=featurized, board=net_type),
        samples_per_epoch = SAMPLES_PER_EPOCH,
        nb_epoch          = NUMBER_EPOCHS,
        callbacks         = [checkpointer],
        validation_data   = (X_val, y_val),
        verbose           = VERBOSE_LEVEL)
项目:miccai-2016-surgical-activity-rec    作者:rdipietro    | 项目源码 | 文件源码
def main():
    """ Run training and export summaries to data_dir/logs for a single test
    setup and a single set of parameters. Summaries include a) TensorBoard
    summaries, b) the latest train/test accuracies and raw edit distances
    (status.txt), c) the latest test predictions along with test ground-truth
    labels (test_label_seqs.pkl, test_prediction_seqs.pkl), d) visualizations
    as training progresses (test_visualizations_######.png)."""

    args = define_and_process_args()
    print('\n', 'ARGUMENTS', '\n\n', args, '\n')

    log_dir = get_log_dir(args)
    print('\n', 'LOG DIRECTORY', '\n\n', log_dir, '\n')

    standardized_data_path = os.path.join(args.data_dir, args.data_filename)
    if not os.path.exists(standardized_data_path):
        message = '%s does not exist.' % standardized_data_path
        raise ValueError(message)

    dataset = data.Dataset(standardized_data_path)
    train_raw_seqs, test_raw_seqs = dataset.get_splits(args.test_users)
    train_triplets = [data.prepare_raw_seq(seq) for seq in train_raw_seqs]
    test_triplets = [data.prepare_raw_seq(seq) for seq in test_raw_seqs]

    train_input_seqs, train_reset_seqs, train_label_seqs = zip(*train_triplets)
    test_input_seqs, test_reset_seqs, test_label_seqs = zip(*test_triplets)

    Model = eval('models.' + args.model_type + 'Model')
    input_size = dataset.input_size
    target_size = dataset.num_classes

    # This is just to satisfy a low-CPU requirement on our cluster
    # when using GPUs.
    if 'CUDA_VISIBLE_DEVICES' in os.environ:
        config = tf.ConfigProto(intra_op_parallelism_threads=2,
                                inter_op_parallelism_threads=2)
    else:
        config = None

    with tf.Session(config=config) as sess:
        model = Model(input_size, target_size, args.num_layers,
                      args.hidden_layer_size, args.init_scale,
                      args.dropout_keep_prob)
        optimizer = optimizers.Optimizer(
            model.loss, args.num_train_sweeps, args.initial_learning_rate,
            args.num_initial_sweeps, args.num_sweeps_per_decay,
            args.decay_factor, args.max_global_grad_norm)
        train(sess, model, optimizer, log_dir, args.batch_size,
              args.num_sweeps_per_summary, args.num_sweeps_per_save,
              train_input_seqs, train_reset_seqs, train_label_seqs,
              test_input_seqs, test_reset_seqs, test_label_seqs)
项目:chess-deep-rl    作者:rajpurkar    | 项目源码 | 文件源码
def validate(model_hdf5, net_type, generator_fn_str, dataset_file, featurized=True):
    from keras.models import load_model
    import data

    d_test = Dataset(dataset_file + 'test.pgn')
    X_val, y_val = d_test.load(generator_fn_str,
        featurized = featurized,
        refresh    = False,
        board      = "both")
    boards = data.board_from_state(X_val)

    if net_type == "from":
        model_from = load_model("saved/" + model_hdf5)
        y_hat_from = model_from.predict(X_val)
        num_correct = 0
        for i in range(len(boards)):
            if y_val[0][i,np.argmax(y_hat_from[i])] > 0:
                num_correct += 1
        print(num_correct / len(boards))

    elif net_type == "to":
        model_to = load_model("saved/" + model_hdf5)
        y_hat_to = model_to.predict([X_val, y_val[0].reshape(y_val[0].shape[0],1,X_val.shape[2],X_val.shape[3])])
        num_correct = 0
        for i in range(len(boards)):
            if y_val[1][i,np.argmax(y_hat_to[i])] > 0:
                num_correct += 1
        print(num_correct / len(boards))

    elif net_type == "from_to":
        model_from = load_model("saved/" + model_hdf5[0])
        model_to = load_model("saved/" + model_hdf5[1])
        y_hat_from = model_from.predict(X_val)

        for i in range(len(boards)):
            from_square = np.argmax(y_hat_from[i])
            y_max_from = np.zeros((1,1,X_val.shape[2],X_val.shape[3]))
            y_max_from.flat[from_square] = 1

            y_hat_to = model_to.predict([np.expand_dims(X_val[i], 0), y_max_from])
            to_square = np.argmax(y_hat_to)
            move_attempt = data.move_from_action(from_square, to_square)
            if boards[i].is_legal(move_attempt):
                print("YAY")
            else:
                print("BOO")
            print(move_attempt)
            move = data.move_from_action(np.argmax(y_val[0]), np.argmax(y_val[1]))
            print(move)