我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.utils.data.DataLoader()。
def convert2tensor(self, dataset, batch_size, limit): data = dataset['data'] data = data[:limit] print("normalizing images...") data = common.normalize(data) print("done") target = dataset['labels'] target = target[:limit] target = np.asarray(target) tensor_data = torch.from_numpy(data) tensor_data = tensor_data.float() tensor_target = torch.from_numpy(target) loader = data_utils.TensorDataset(tensor_data, tensor_target) loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True) return loader_dataset
def convert2tensor(self, dataset, batch_size, limit): b_data = dataset['X'] b_data = b_data[:limit] print("normalizing images...") b_data = common.normalize(b_data) print("done") target = dataset['y'] target = target.reshape((len(target))) target = target[:limit] """SVHN dataset is between 1 to 10: shift this to 0 to 9 to fit with neural network""" target = target - 1 data = [] for i in range(len(target)): data.append(b_data[:,:,:,i]) data = np.asarray(data) tensor_data = torch.from_numpy(data) tensor_data = tensor_data.float() tensor_target = torch.from_numpy(target) loader = data_utils.TensorDataset(tensor_data, tensor_target) loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True) return loader_dataset
def get_loader(df, transformations): dset_val = KaggleAmazonJPGDataset(df, paths.train_jpg, transformations, divide=False) loader_val = DataLoader(dset_val, batch_size=batch_size, num_workers=12, pin_memory=True,) return loader_val
def get_data_loader(dataset_name, batch_size=1, dataset_transforms=None, is_training_set=True, shuffle=True): if not dataset_transforms: dataset_transforms = [] trans = transforms.Compose([transforms.ToTensor()] + dataset_transforms) dataset = getattr(datasets, dataset_name) return DataLoader( dataset(root=DATA_DIR, train=is_training_set, transform=trans, download=True), batch_size=batch_size, shuffle=shuffle )
def load_data(self): print('=' * 50) print('Loading data...') transform = transforms.Compose([ transforms.ImageOps.grayscale, transforms.Scale((cfg.img_width, cfg.img_height)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) syn_train_folder = torchvision.datasets.ImageFolder(root=cfg.syn_path, transform=transform) # print(syn_train_folder) self.syn_train_loader = Data.DataLoader(syn_train_folder, batch_size=cfg.batch_size, shuffle=True, pin_memory=True) print('syn_train_batch %d' % len(self.syn_train_loader)) real_folder = torchvision.datasets.ImageFolder(root=cfg.real_path, transform=transform) # real_folder.imgs = real_folder.imgs[:2000] self.real_loader = Data.DataLoader(real_folder, batch_size=cfg.batch_size, shuffle=True, pin_memory=True) print('real_batch %d' % len(self.real_loader))
def train(self): training_set = spatial_dataset(dic=self.dic_training, root_dir=self.data_path, mode='train', transform = transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) ])) print '==> Training data :',len(training_set),'frames' print training_set[1][0]['img1'].size() train_loader = DataLoader( dataset=training_set, batch_size=self.BATCH_SIZE, shuffle=True, num_workers=self.num_workers) return train_loader
def validate(self): validation_set = spatial_dataset(dic=self.dic_testing, root_dir=self.data_path, mode='val', transform = transforms.Compose([ transforms.Scale([224,224]), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]) ])) print '==> Validation data :',len(validation_set),'frames' print validation_set[1][1].size() val_loader = DataLoader( dataset=validation_set, batch_size=self.BATCH_SIZE, shuffle=False, num_workers=self.num_workers) return val_loader
def train(self): training_set = motion_dataset(dic=self.dic_video_train, in_channel=self.in_channel, root_dir=self.data_path, mode='train', transform = transforms.Compose([ transforms.Scale([224,224]), transforms.ToTensor(), ])) print '==> Training data :',len(training_set),' videos',training_set[1][0].size() train_loader = DataLoader( dataset=training_set, batch_size=self.BATCH_SIZE, shuffle=True, num_workers=self.num_workers, pin_memory=True ) return train_loader
def val(self): validation_set = motion_dataset(dic= self.dic_test_idx, in_channel=self.in_channel, root_dir=self.data_path , mode ='val', transform = transforms.Compose([ transforms.Scale([224,224]), transforms.ToTensor(), ])) print '==> Validation data :',len(validation_set),' frames',validation_set[1][1].size() #print validation_set[1] val_loader = DataLoader( dataset=validation_set, batch_size=self.BATCH_SIZE, shuffle=False, num_workers=self.num_workers) return val_loader
def test_sequential_batch(self): loader = DataLoader(self.dataset, batch_size=2, shuffle=False) batch_size = loader.batch_size for i, sample in enumerate(loader): idx = i * batch_size self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'}) self.assertEqual(set(sample['another_dict'].keys()), {'a_number'}) t = sample['a_tensor'] self.assertEqual(t.size(), torch.Size([batch_size, 4, 2])) self.assertTrue((t[0] == idx).all()) self.assertTrue((t[1] == idx + 1).all()) n = sample['another_dict']['a_number'] self.assertEqual(n.size(), torch.Size([batch_size])) self.assertEqual(n[0], idx) self.assertEqual(n[1], idx + 1)
def _test_batch_sampler(self, **kwargs): # [(0, 1), (2, 3, 4), (5, 6), (7, 8, 9), ...] batches = [] for i in range(0, 100, 5): batches.append(tuple(range(i, i + 2))) batches.append(tuple(range(i + 2, i + 5))) dl = DataLoader(self.dataset, batch_sampler=batches, **kwargs) self.assertEqual(len(dl), 40) for i, (input, _target) in enumerate(dl): if i % 2 == 0: offset = i * 5 // 2 self.assertEqual(len(input), 2) self.assertEqual(input, self.data[offset:offset + 2]) else: offset = i * 5 // 2 self.assertEqual(len(input), 3) self.assertEqual(input, self.data[offset:offset + 3])
def get_loaders(loader_batchsize, **kwargs): arguments=kwargs['arguments'] data = arguments.data if data == 'mnist': kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0,), (1,)) ])), batch_size=loader_batchsize, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((7,), (0.3081,)) ])), batch_size=loader_batchsize, shuffle=True, **kwargs) return train_loader, test_loader
def form_torch_mixture_dataset(MSabs, MSphase, SPCS1abs, SPCS2abs, wavfls1, wavfls2, lens1, lens2, arguments): MSabs = torch.from_numpy(np.array(MSabs)) MSphase = torch.from_numpy(np.array(MSphase)) SPCS1abs = torch.from_numpy(np.array(SPCS1abs)) SPCS2abs = torch.from_numpy(np.array(SPCS2abs)) wavfls1 = torch.from_numpy(np.array(wavfls1)) wavfls2 = torch.from_numpy(np.array(wavfls2)) dataset = MixtureDataset(MSabs, MSphase, SPCS1abs, SPCS2abs, wavfls1, wavfls2, lens1, lens2) kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {} loader = data_utils.DataLoader(dataset, batch_size=arguments.batch_size, shuffle=False, **kwargs) return loader
def check_data(self, _, y): if y is None and self.iterator_train is DataLoader: raise ValueError("No y-values are given (y=None). You must " "implement your own DataLoader for training " "(and your validation) and supply it using the " "``iterator_train`` and ``iterator_valid`` " "parameters respectively.") elif y is None: # The user implements its own mechanism for generating y. return # The problem with 1-dim float y is that the pytorch DataLoader will # somehow upcast it to DoubleTensor if get_dim(y) == 1: raise ValueError("The target data shouldn't be 1-dimensional; " "please reshape (e.g. y.reshape(-1, 1).") # pylint: disable=signature-differs
def check_aug(): nfold = 0 tst_dataset = CSVDataset_tst(f'../../_data/fold{nfold}/train.csv') tst = data.DataLoader(tst_dataset, batch_size=1, shuffle=False, num_workers=8) for j, val_data in enumerate(tst, 0): if j == 3: inputs, labels = val_data inputs, labels = inputs.numpy()[0], labels.numpy()[0] print(inputs.shape, labels.shape, np.amax(inputs), np.amin(inputs), np.mean(inputs)) for i in range(13): plt.subplot(3, 5, 1 + i) plt.imshow(np.transpose(inputs[i], (1, 2, 0))) break plt.show()
def validate(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4) running_metrics = runningScore(n_classes) # Setup Model model = get_model(args.model_path[:args.model_path.find('_')], n_classes) state = convert_state_dict(torch.load(args.model_path)['model_state']) model.load_state_dict(state) model.eval() for i, (images, labels) in tqdm(enumerate(valloader)): model.cuda() images = Variable(images.cuda(), volatile=True) labels = Variable(labels.cuda(), volatile=True) outputs = model(images) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) for i in range(n_classes): print(i, class_iou[i])
def train(args): # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols)) n_classes = loader.n_classes trainloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4, shuffle=True) # Setup visdom for visualization if args.visdom: vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='minibatches', ylabel='Loss', title='Training Loss', legend=['Loss'])) # Setup Model model = get_model(args.arch, n_classes) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) model.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4) for epoch in range(args.n_epoch): for i, (images, labels) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda()) optimizer.zero_grad() outputs = model(images) loss = cross_entropy2d(outputs, labels) loss.backward() optimizer.step() if args.visdom: vis.line( X=torch.ones((1, 1)).cpu() * i, Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(), win=loss_window, update='append') if (i+1) % 20 == 0: print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0])) torch.save(model, "{}_{}_{}_{}.pkl".format(args.arch, args.dataset, args.feature_scale, epoch))
def CreateDataLoader(opt): random.seed(opt.manualSeed) # folder dataset CTrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) VTrans = transforms.Compose([ RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) STrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(rootC=opt.datarootC, rootS=opt.datarootS, transform=CTrans, vtransform=VTrans, stransform=STrans ) assert dataset return data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt): random.seed(opt.manualSeed) # folder dataset CTrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) VTrans = transforms.Compose([ RandomSizedCrop(224, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) STrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(rootC=opt.datarootC, rootS=opt.datarootS, transform=CTrans, vtransform=VTrans, stransform=STrans ) assert dataset return data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt): random.seed(opt.manualSeed) # folder dataset CTrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) VTrans = transforms.Compose([ RandomSizedCrop(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) STrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(rootC=opt.datarootC, rootS=opt.datarootS, transform=CTrans, vtransform=VTrans, stransform=STrans ) assert dataset return data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers), drop_last=True)
def CreateDataLoader(opt): random.seed(opt.manualSeed) # folder dataset CTrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) VTrans = transforms.Compose([ RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) def jitter(x): ran = random.uniform(0.7, 1) return x * ran + 1 - ran STrans = transforms.Compose([ transforms.Scale(opt.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Lambda(jitter), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = ImageFolder(rootC=opt.datarootC, rootS=opt.datarootS, transform=CTrans, vtransform=VTrans, stransform=STrans ) assert dataset return data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers), drop_last=True)
def get_loader(chunk_list): data = [] label = [] for f in chunk_list: print ('Loading data from %s' %f) with h5py.File(f, 'r') as hf: data.append(np.asarray(hf['data'])) label.append(np.asarray(hf['label'])) data = torch.FloatTensor(np.concatenate(data, axis = 0)) label = torch.FloatTensor(np.concatenate(label, axis = 0)) print ('Total %d frames loaded' %data.size(0)) dset_train = TensorDataset(data, label) loader_train = DataLoader(dset_train, batch_size = 256, shuffle = True, num_workers = 10, pin_memory = False) return loader_train
def train(model, db, args, bsz=32, eph=1, use_cuda=False): print("Training...") trainloader = data_utils.DataLoader(dataset=db, batch_size=bsz, shuffle=True) criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9) best_loss = 100000 for epoch in range(eph): running_loss = 0.0 for i, data in enumerate(trainloader, 1): inputs, targets = data inputs = inputs.unsqueeze(1) targets = target_onehot_to_classnum_tensor(targets) if use_cuda and cuda_ava: inputs = Variable(inputs.float().cuda()) targets = Variable(targets.cuda()) else: inputs = Variable(inputs.float()) targets = Variable(targets) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() running_loss += loss.data[0] last_loss = loss.data[0] if i % 100 == 0: print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100)) running_loss = 0 if last_loss < best_loss: best_loss = last_loss acc = evaluate(model, trainloader, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S")))) acc = evaluate(model, trainloader, use_cuda) torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_all_acc_{}.t7'.format(acc))) print("Finished Training!")
def test_sequential(self): self._test_sequential(DataLoader(self.dataset))
def test_sequential_batch(self): self._test_sequential(DataLoader(self.dataset, batch_size=2))
def test_shuffle(self): self._test_shuffle(DataLoader(self.dataset, shuffle=True))
def test_shuffle_batch(self): self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
def test_sequential_workers(self): self._test_sequential(DataLoader(self.dataset, num_workers=4))
def test_shuffle_workers(self): self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
def test_shuffle_batch_workers(self): self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
def test_error(self): self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
def test_error_workers(self): self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
def test_partial_workers(self): "check that workers exit even if the iterator is not exhausted" loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4)) workers = loader.workers for i, sample in enumerate(loader): if i == 3: break del loader for w in workers: w.join(1.0) # timeout of one second self.assertFalse(w.is_alive(), 'subprocess not terminated') self.assertEqual(w.exitcode, 0)
def parallel(self, *args, **kwargs): return DataLoader(self, *args, **kwargs)
def main(): training_batch_size = 352 validation_batch_size = 352 net = get_res152(num_classes=num_classes, snapshot_path=os.path.join( ckpt_path, 'epoch_15_validation_loss_0.0772_iter_1000.pth')).cuda() net.eval() transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.311, 0.340, 0.299], [0.167, 0.144, 0.138]) ]) criterion = nn.MultiLabelSoftMarginLoss().cuda() train_set = MultipleClassImageFolder(split_train_dir, transform) train_loader = DataLoader(train_set, batch_size=training_batch_size, num_workers=16) batch_outputs, batch_labels = predict(net, train_loader) loss = criterion(batch_outputs, batch_labels) print 'training loss %.4f' % loss.cpu().data.numpy()[0] batch_outputs = batch_outputs.cpu().data.numpy() batch_labels = batch_labels.cpu().data.numpy() thretholds = find_best_threthold(batch_outputs, batch_labels) val_set = MultipleClassImageFolder(split_val_dir, transform) val_loader = DataLoader(val_set, batch_size=validation_batch_size, num_workers=16) batch_outputs, batch_labels = predict(net, val_loader) loss = criterion(batch_outputs, batch_labels) print 'validation loss %.4f' % loss.cpu().data.numpy()[0] batch_outputs = batch_outputs.cpu().data.numpy() batch_labels = batch_labels.cpu().data.numpy() sio.savemat('./val_output.mat', {'outputs': batch_outputs, 'labels': batch_labels}) prediction = get_one_hot_prediction(batch_outputs, thretholds) evaluation = evaluate(prediction, batch_labels) print 'validation evaluation: accuracy %.4f, precision %.4f, recall %.4f, f2 %.4f' % ( evaluation[0], evaluation[1], evaluation[2], evaluation[3])
def test_sequence_wise_torch_data_loader(): import torch from torch.utils import data as data_utils X, Y = _get_small_datasets(padded=False) class TorchDataset(data_utils.Dataset): def __init__(self, X, Y): self.X = X self.Y = Y def __getitem__(self, idx): return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx]) def __len__(self): return len(self.X) def __test(X, Y, batch_size): dataset = TorchDataset(X, Y) loader = data_utils.DataLoader( dataset, batch_size=batch_size, num_workers=1, shuffle=True) for idx, (x, y) in enumerate(loader): assert len(x.shape) == len(y.shape) assert len(x.shape) == 3 print(idx, x.shape, y.shape) # Test with batch_size = 1 yield __test, X, Y, 1 # Since we have variable length frames, batch size larger than 1 causes # runtime error. yield raises(RuntimeError)(__test), X, Y, 2 # For padded dataset, which can be reprensented by (N, T^max, D), batchsize # can be any number. X, Y = _get_small_datasets(padded=True) yield __test, X, Y, 1 yield __test, X, Y, 2
def test_frame_wise_torch_data_loader(): import torch from torch.utils import data as data_utils X, Y = _get_small_datasets(padded=False) # Since torch's Dataset (and Chainer, and maybe others) assumes dataset has # fixed size length, i.e., implements `__len__` method, we need to know # number of frames for each utterance. # Sum of the number of frames is the dataset size for frame-wise iteration. lengths = np.array([len(x) for x in X], dtype=np.int) # For the above reason, we need to explicitly give the number of frames. X = MemoryCacheFramewiseDataset(X, lengths, cache_size=len(X)) Y = MemoryCacheFramewiseDataset(Y, lengths, cache_size=len(Y)) class TorchDataset(data_utils.Dataset): def __init__(self, X, Y): self.X = X self.Y = Y def __getitem__(self, idx): return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx]) def __len__(self): return len(self.X) def __test(X, Y, batch_size): dataset = TorchDataset(X, Y) loader = data_utils.DataLoader( dataset, batch_size=batch_size, num_workers=1, shuffle=True) for idx, (x, y) in enumerate(loader): assert len(x.shape) == 2 assert len(y.shape) == 2 yield __test, X, Y, 128 yield __test, X, Y, 256
def __init__(self, trainer, dataset, start_epoch=0, momentum=0, batch_size=96): super(SemiSupervisedUpdater, self).__init__() self.trainer = trainer self.dataset = dataset self.start_epoch = start_epoch self.loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=10, pin_memory=True) self.momentum = momentum
def get_test_loader(test_images, transformations): dset_test = KaggleAmazonTestDataset(test_images, paths.test_jpg, '.jpg', transformations, divide=False) loader_val = DataLoader(dset_test, batch_size=batch_size, num_workers=12, pin_memory=True) return loader_val
def __init__(self, opt, shared=None): opt['batch_sort'] = False super().__init__(opt, shared) self.use_batch_act = self.bsz > 1 self.num_workers = opt['numworkers'] # One can specify a collate function to use for preparing a batch collate_fn = opt.get('collate_fn', default_collate) if not shared: self.dataset = StreamDataset(opt) self.pytorch_dataloader = DataLoader( self.dataset, batch_size=self.bsz, shuffle=False, sampler=sampler.SequentialSampler(self.dataset), num_workers=self.num_workers, collate_fn=collate_fn, pin_memory=False, drop_last=False, ) self.lastYs = [None] * self.bsz else: self.dataset = shared['dataset'] self.pytorch_dataloader = shared['pytorch_dataloader'] self.lastYs = shared['lastYs'] self.num_batches = math.ceil(self.dataset.num_examples()/self.bsz) self.reset()
def setup_data_loaders(dataset, use_cuda, batch_size, sup_num=None, root='./data', download=True, **kwargs): """ helper function for setting up pytorch data loaders for a semi-supervised dataset :param dataset: the data to use :param use_cuda: use GPU(s) for training :param batch_size: size of a batch of data to output when iterating over the data loaders :param sup_num: number of supervised data examples :param root: where on the filesystem should the dataset be :param download: download the dataset (if it doesn't exist already) :param kwargs: other params for the pytorch data loader :return: three data loaders: (supervised data for training, un-supervised data for training, supervised data for testing) """ # instantiate the dataset as training/testing sets if 'num_workers' not in kwargs: kwargs = {'num_workers': 0, 'pin_memory': False} cached_data = {} loaders = {} for mode in ["unsup", "test", "sup", "valid"]: if sup_num is None and mode == "sup": # in this special case, we do not want "sup" and "valid" data loaders return loaders["unsup"], loaders["test"] cached_data[mode] = dataset(root=root, mode=mode, download=download, sup_num=sup_num, use_cuda=use_cuda) loaders[mode] = DataLoader(cached_data[mode], batch_size=batch_size, shuffle=True, **kwargs) return loaders
def train_loader(self, value): assert isinstance(value, DataLoader) self._loaders.update({'train': value})
def validate_loader(self, value): assert isinstance(value, DataLoader) self._loaders.update({'validate': value})
def get_loader(image_path, image_size, batch_size, num_workers=2): """Builds and returns Dataloader.""" transform = transforms.Compose([ transforms.Scale(image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset = ImageFolder(image_path, transform) data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) return data_loader
def get_loader(image_path, image_size, batch_size, transform, num_workers=2): dataset = ImageFolder(image_path, transform) data_laoder = data.DataLoader( dataset = dataset, batch_size = batch_size, shuffle = True, num_workers = num_workers ) return data_loader