我们从Python开源项目中,提取了以下39个代码示例,用于说明如何使用torch.utils.data.TensorDataset()。
def convert2tensor(self, dataset, batch_size, limit): data = dataset['data'] data = data[:limit] print("normalizing images...") data = common.normalize(data) print("done") target = dataset['labels'] target = target[:limit] target = np.asarray(target) tensor_data = torch.from_numpy(data) tensor_data = tensor_data.float() tensor_target = torch.from_numpy(target) loader = data_utils.TensorDataset(tensor_data, tensor_target) loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True) return loader_dataset
def convert2tensor(self, dataset, batch_size, limit): b_data = dataset['X'] b_data = b_data[:limit] print("normalizing images...") b_data = common.normalize(b_data) print("done") target = dataset['y'] target = target.reshape((len(target))) target = target[:limit] """SVHN dataset is between 1 to 10: shift this to 0 to 9 to fit with neural network""" target = target - 1 data = [] for i in range(len(target)): data.append(b_data[:,:,:,i]) data = np.asarray(data) tensor_data = torch.from_numpy(data) tensor_data = tensor_data.float() tensor_target = torch.from_numpy(target) loader = data_utils.TensorDataset(tensor_data, tensor_target) loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True) return loader_dataset
def get_loader(chunk_list): data = [] label = [] for f in chunk_list: print ('Loading data from %s' %f) with h5py.File(f, 'r') as hf: data.append(np.asarray(hf['data'])) label.append(np.asarray(hf['label'])) data = torch.FloatTensor(np.concatenate(data, axis = 0)) label = torch.FloatTensor(np.concatenate(label, axis = 0)) print ('Total %d frames loaded' %data.size(0)) dset_train = TensorDataset(data, label) loader_train = DataLoader(dset_train, batch_size = 256, shuffle = True, num_workers = 10, pin_memory = False) return loader_train
def test_len(self): source = TensorDataset(torch.randn(15, 10, 2, 3, 4, 5), torch.randperm(15)) self.assertEqual(len(source), 15)
def test_getitem(self): t = torch.randn(15, 10, 2, 3, 4, 5) l = torch.randn(15, 10) source = TensorDataset(t, l) for i in range(15): self.assertEqual(t[i], source[i][0]) self.assertEqual(l[i], source[i][1])
def test_getitem_1d(self): t = torch.randn(15) l = torch.randn(15) source = TensorDataset(t, l) for i in range(15): self.assertEqual(t[i:i+1], source[i][0]) self.assertEqual(l[i:i+1], source[i][1])
def setUp(self): self.data = torch.randn(100, 2, 3, 5) self.labels = torch.randperm(50).repeat(2) self.dataset = TensorDataset(self.data, self.labels)
def test_getitem_1d(self): t = torch.randn(15) l = torch.randn(15) source = TensorDataset(t, l) for i in range(15): self.assertEqual(t[i], source[i][0]) self.assertEqual(l[i], source[i][1])
def efficient_batch_iterator(X: Iterable[T1], y: Opt[Iterable[T2]]=None, X_encoder: Opt[Callable[[T1], TensorType]]=None, y_encoder: Opt[Callable[[T2], TensorType]]=None, batch_size: int=32, shuffle: bool=False, num_workers: int=0, classifier: bool=False, collate_fn = default_collate) -> Iterable[Tuple[TensorType, TensorType]]: num_workers = get_torch_num_workers(num_workers) if y is None: # for, e.g. autoencoders y = X if isinstance(X, ArrayTypes): if isinstance(y, ArrayTypes): # encoders should take batch tensors in this instance dataset = TensorDataset(to_tensor(X), to_tensor(y)) return MappedDataLoader(X_encoder=X_encoder, y_encoder=y_encoder, dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, collate_fn=collate_fn) elif isinstance(X, Seq) or (hasattr(X, '__len__') and hasattr(X, '__getitem__')): if isinstance(y, Seq) or (hasattr(y, '__len__') and hasattr(y, '__getitem__')): # Seq has __len__ and __getitem__ so it can serve as a dataset dataset = SeqDataset(X, y, X_encoder=X_encoder, y_encoder=y_encoder) return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn, num_workers=num_workers) elif isinstance(X, Iterable): if isinstance(y, Iterable): return BatchedIterableDataloader(X, y, batch_size=batch_size, X_encoder=X_encoder, y_encoder=y_encoder, classifier=classifier, num_workers=num_workers) else: raise TypeError("`X` and `y` must both be array types, numeric sequences, or iterables.")
def load_static_mnist(args): def lines_to_np_array(lines): return np.array([[int(i) for i in line.split()] for line in lines]) with open(os.path.join('datasets', 'MNIST_static', 'binarized_mnist_train.amat')) as f: lines = f.readlines() x_train = lines_to_np_array(lines).astype('float32') with open(os.path.join('datasets', 'MNIST_static', 'binarized_mnist_valid.amat')) as f: lines = f.readlines() x_val = lines_to_np_array(lines).astype('float32') with open(os.path.join('datasets', 'MNIST_static', 'binarized_mnist_test.amat')) as f: lines = f.readlines() x_test = lines_to_np_array(lines).astype('float32') # shuffle train data np.random.shuffle(x_train) # idle y's y_train = np.zeros( (x_train.shape[0], 1) ) y_val = np.zeros( (x_val.shape[0], 1) ) y_test = np.zeros( (x_test.shape[0], 1) ) # pytorch data loader train = data_utils.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True) validation = data_utils.TensorDataset(torch.from_numpy(x_val).float(), torch.from_numpy(y_val)) val_loader = data_utils.DataLoader(validation, batch_size=args.test_batch_size, shuffle=False) test = data_utils.TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test)) test_loader = data_utils.DataLoader(test, batch_size=args.test_batch_size, shuffle=True) return train_loader, val_loader, test_loader # ======================================================================================================================
def load_dynamic_mnist(args): from keras.datasets import mnist # loading data from Keras (x_train, y_train), (x_test, y_test) = mnist.load_data() # preparing data x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) y_test = np.array(y_test, dtype=int) x_val = x_train[50000:60000] y_val = np.array(y_train[50000:60000], dtype=int) x_train = x_train[0:50000] y_train = np.array(y_train[0:50000], dtype=int) # binarize np.random.seed(777) x_val = np.random.binomial(1, x_val) x_test = np.random.binomial(1, x_test) # pytorch data loader train = data_utils.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True) validation = data_utils.TensorDataset(torch.from_numpy(x_val).float(), torch.from_numpy(y_val)) val_loader = data_utils.DataLoader(validation, batch_size=args.test_batch_size, shuffle=False) test = data_utils.TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test)) test_loader = data_utils.DataLoader(test, batch_size=args.test_batch_size, shuffle=False) return train_loader, val_loader, test_loader # ======================================================================================================================
def load_omniglot(args, n_validation=1345): def reshape_data(data): return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran') omni_raw = loadmat(os.path.join('datasets', 'OMNIGLOT', 'chardata.mat')) # train and test data train_data = reshape_data(omni_raw['data'].T.astype('float32')) test_data = reshape_data(omni_raw['testdata'].T.astype('float32')) # shuffle train data np.random.shuffle(train_data) # set train and validation data x_train = train_data[:-n_validation] validation_data = train_data[-n_validation:] # fixed binarization np.random.seed(777) x_val = np.random.binomial(1, validation_data) x_test = np.random.binomial(1, test_data) # idle y's y_train = np.zeros( (x_train.shape[0], 1) ) y_val = np.zeros( (x_val.shape[0], 1) ) y_test = np.zeros( (x_test.shape[0], 1) ) # pytorch data loader train = data_utils.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True) validation = data_utils.TensorDataset(torch.from_numpy(x_val).float(), torch.from_numpy(y_val)) val_loader = data_utils.DataLoader(validation, batch_size=args.test_batch_size, shuffle=False) test = data_utils.TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test)) test_loader = data_utils.DataLoader(test, batch_size=args.test_batch_size, shuffle=True) return train_loader, val_loader, test_loader # ======================================================================================================================
def load_caltech101silhouettes(args): def reshape_data(data): return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran') caltech_raw = loadmat(os.path.join('datasets', 'Caltech101Silhouettes', 'caltech101_silhouettes_28_split1.mat')) # train, validation and test data x_train = reshape_data(caltech_raw['train_data'].astype('float32')) np.random.shuffle(x_train) x_val = reshape_data(caltech_raw['val_data'].astype('float32')) np.random.shuffle(x_val) x_test = reshape_data(caltech_raw['test_data'].astype('float32')) y_train = caltech_raw['train_labels'] y_val = caltech_raw['val_labels'] y_test = caltech_raw['test_labels'] # pytorch data loader train = data_utils.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True) validation = data_utils.TensorDataset(torch.from_numpy(x_val).float(), torch.from_numpy(y_val)) val_loader = data_utils.DataLoader(validation, batch_size=args.test_batch_size, shuffle=False) test = data_utils.TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test)) test_loader = data_utils.DataLoader(test, batch_size=args.test_batch_size, shuffle=True) return train_loader, val_loader, test_loader # ======================================================================================================================
def prepare_mixture_gm_data(arguments): dataset = [] arguments.L2 = 2 arguments.L1 = 2 arguments.K = 200 sig0 = 5 sig = 0.1 num_means = arguments.num_means means = 5*torch.randn(num_means, arguments.L2) arguments.means = means.numpy() N = 2000 mixinds = torch.multinomial(torch.ones(num_means), N, replacement=True) obsnoise = torch.randn(N, arguments.L2) data = means[mixinds] + obsnoise inp = torch.randn(N, arguments.L1) dataset1 = TensorDataset(inp, data, [1]*N) datasetmix = dataset1 kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {} loader1 = data_utils.DataLoader(dataset1, batch_size=arguments.batch_size, shuffle=False, **kwargs) loader_mix = data_utils.DataLoader(datasetmix, batch_size=arguments.batch_size, shuffle=False, **kwargs) return loader1, loader_mix
def form_mixtures(digit1, digit2, loader, arguments): dataset1, dataset2 = [], [] for i, (ft, tar) in enumerate(loader): # digit 1 mask = torch.eq(tar, digit1) inds = torch.nonzero(mask).squeeze() ft1 = torch.index_select(ft, dim=0, index=inds) dataset1.append(ft1) # digit 2 mask = torch.eq(tar, digit2) inds = torch.nonzero(mask).squeeze() ft2 = torch.index_select(ft, dim=0, index=inds) dataset2.append(ft2) print(i) dataset1 = torch.cat(dataset1, dim=0) dataset2 = torch.cat(dataset2, dim=0) if arguments.input_type == 'noise': inp1 = torch.randn(dataset1.size(0), arguments.L1) inp2 = torch.randn(dataset2.size(0), arguments.L1) elif arguments.input_type == 'autoenc': inp1 = dataset1 inp2 = dataset2 else: raise ValueError('Whaaaaaat input_type?') N1, N2 = dataset1.size(0), dataset2.size(0) Nmix = min([N1, N2]) dataset_mix = dataset1[:Nmix] + dataset2[:Nmix] dataset1 = TensorDataset(data_tensor=inp1, target_tensor=dataset1, lens=[1]*Nmix) dataset2 = data_utils.TensorDataset(data_tensor=inp2, target_tensor=dataset2) dataset_mix = data_utils.TensorDataset(data_tensor=dataset_mix, target_tensor=torch.ones(Nmix)) kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {} loader1 = data_utils.DataLoader(dataset1, batch_size=arguments.batch_size, shuffle=False, **kwargs) loader2 = data_utils.DataLoader(dataset2, batch_size=arguments.batch_size, shuffle=False, **kwargs) loader_mix = data_utils.DataLoader(dataset_mix, batch_size=arguments.batch_size, shuffle=False, **kwargs) return loader1, loader2, loader_mix
def make_dataloader(xs, ys): xs = torch.Tensor(xs).cuda() ys = torch.Tensor(ys).cuda() torch_dataset = Data.TensorDataset(data_tensor=xs, target_tensor=ys) loader = Data.DataLoader(dataset=torch_dataset, batch_size=1, shuffle=True) return loader
def make_dataloader(xs, ys): xs = torch.Tensor(xs) ys = torch.Tensor(ys) torch_dataset = Data.TensorDataset(data_tensor=xs, target_tensor=ys) loader = Data.DataLoader(dataset=torch_dataset, batch_size=1, shuffle=True) return loader
def test_add_dataset(self): d1 = TensorDataset(torch.rand(7, 3, 28, 28), torch.rand(7)) d2 = TensorDataset(torch.rand(7, 3, 28, 28), torch.rand(7)) d3 = TensorDataset(torch.rand(7, 3, 28, 28), torch.rand(7)) result = d1 + d2 + d3 self.assertEqual(21, len(result)) self.assertEqual(0, (d1[0][0] - result[0][0]).abs().sum()) self.assertEqual(0, (d2[0][0] - result[7][0]).abs().sum()) self.assertEqual(0, (d3[0][0] - result[14][0]).abs().sum())
def convert2tensor(self, args): data = np.asarray([e[0] for e in self.binary_train_dataset]) target = np.asarray([e[1] for e in self.binary_train_dataset]) tensor_data = torch.from_numpy(data) tensor_data = tensor_data.float() tensor_target = torch.from_numpy(target) train = data_utils.TensorDataset(tensor_data, tensor_target) train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle = True) return train_loader
def process(self, classlabel): dataset_zip = np.load(join(self.root, "dsprites.npz"), encoding='bytes') imgs = torch.from_numpy(dataset_zip['imgs']).float() latents_values = torch.from_numpy(dataset_zip['latents_values']) latents_classes = torch.from_numpy(dataset_zip['latents_classes']) print("Dataset shape: %s" % str(tuple(imgs.size()))) if classlabel: label = latents_classes else: label = latents_values dataset = tdata.TensorDataset(imgs, label) print(imgs[0]) print(label[0]) return dataset
def create_train_test_loaders(Q, x_train, x_test, y_train, y_test, batch_size): num_kernels = Q.shape[2] max_document_length = x_train.shape[1] dim = Q.shape[1] my_x = [] for i in range(x_train.shape[0]): temp = np.zeros((1, num_kernels, max_document_length, dim)) for j in range(num_kernels): for k in range(x_train.shape[1]): temp[0,j,k,:] = Q[x_train[i,k],:,j].squeeze() my_x.append(temp) if torch.cuda.is_available(): tensor_x = torch.stack([torch.cuda.FloatTensor(i) for i in my_x]) # transform to torch tensors tensor_y = torch.cuda.LongTensor(y_train.tolist()) else: tensor_x = torch.stack([torch.Tensor(i) for i in my_x]) # transform to torch tensors tensor_y = torch.from_numpy(np.asarray(y_train,dtype=np.int64)) train_dataset = utils.TensorDataset(tensor_x, tensor_y) train_loader = utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) my_x = [] for i in range(x_test.shape[0]): temp = np.zeros((1, num_kernels, max_document_length, dim)) for j in range(num_kernels): for k in range(x_test.shape[1]): temp[0,j,k,:] = Q[x_test[i,k],:,j].squeeze() my_x.append(temp) if torch.cuda.is_available(): tensor_x = torch.stack([torch.cuda.FloatTensor(i) for i in my_x]) # transform to torch tensors tensor_y = torch.cuda.LongTensor(y_test.tolist()) else: tensor_x = torch.stack([torch.Tensor(i) for i in my_x]) # transform to torch tensors tensor_y = torch.from_numpy(np.asarray(y_test,dtype=np.int64)) test_dataset = utils.TensorDataset(tensor_x, tensor_y) test_loader = utils.DataLoader(test_dataset, batch_size=1, shuffle=False) return train_loader, test_loader
def make_dataloader_custom_tensordataset(dir_data, data_transforms, ext_img, n_img_per_batch, n_worker): li_class = prepare_cifar10_dataset(dir_data, ext_img) n_class = len(li_class) li_set = ['train', 'test'] #''' features = {} targets = {} for set in li_set: ts_img_total, ts_label_total = torch.Tensor(), torch.LongTensor() print('building input vectors for [%s]' % (set)) for i_l, label in enumerate(li_class): #if i_l > 0: # break print('dumping images of [%s] into memory' % (label)) dir_label = join(join(dir_data, set), label) #''' li_ts_img = [data_transforms[set](Image.open(join(dir_label, fn_img)).convert('RGB')) for fn_img in listdir(dir_label) if fn_img.endswith(ext_img)] #''' n_img_4_this_label = len(li_ts_img) ts_img = torch.stack(li_ts_img) ts_img_total = torch.cat((ts_img_total, ts_img)) li_label = [i_l for i in range(n_img_4_this_label)] ts_label = torch.LongTensor(li_label) #li_label = [torch.Tensor(i_l) for i in range(n_img_4_this_label)] #ts_label = torch.stack(li_label) ts_label_total = torch.cat((ts_label_total, ts_label)) #ts_img_tmp = torch.Tensor(li_img_tmp) #li_img += li_img_tmp #li_label += [i_l for i in range(n_img_4_this_label)] #features[set] = torch.Tensor(li_img) #targets[set] = torch.Tensor(li_label) features[set] = ts_img_total targets[set] = ts_label_total #''' ''' features = {x : [data_transforms[x](Image.open(join(join(join(dir_data, x), label), fn_img)).convert('RGB')) for label in li_class for fn_img in listdir(join(join(dir_data, x), label)) if fn_img.endswith(ext_img)] for x in li_set} targets = {x : [i_l for fn_img in listdir(join(join(dir_data, x), label)) if fn_img.endswith(ext_img)] for x in li_set for i_l, label in enumerate(li_class)} ''' dsets = {x: utils_data.TensorDataset(features[x], targets[x]) for x in li_set} dset_loaders = {x: utils_data.DataLoader( dsets[x], batch_size=n_img_per_batch, shuffle=True, num_workers=n_worker) for x in li_set} trainloader, testloader = dset_loaders[li_set[0]], dset_loaders[li_set[1]] return trainloader, testloader, li_class
def form_torch_audio_dataset(SPCSabs, SPCSphase, lens, arguments, loadertype): SPCSabs = torch.from_numpy(np.array(SPCSabs)) if loadertype == 'mixture': SPCSphase = torch.from_numpy(np.array(SPCSphase)) dataset = TensorDataset(data_tensor=SPCSabs, target_tensor=SPCSphase, lens=lens) elif loadertype == 'source': if arguments.input_type == 'noise': if arguments.noise_type == 'gamma': a, b = 1, 10 b = 1/float(b) sz = (SPCSabs.size(0), SPCSabs.size(1), arguments.L1) inp_np = np.random.gamma(a, b, sz) plt.matshow(inp_np.squeeze().transpose()[:, :50]) inp = torch.from_numpy(inp_np).float() elif arguments.noise_type == 'bernoulli': sz = (SPCSabs.size(0), SPCSabs.size(1), arguments.L1) mat = (1/float(8))*torch.ones(sz) inp = torch.bernoulli(mat) elif arguments.noise_type == 'gaussian': inp = torch.randn(SPCSabs.size(0), SPCSabs.size(1), arguments.L1) else: raise ValueError('Whaaaat?') elif arguments.input_type == 'autoenc': inp = SPCSabs arguments.L1 = arguments.L2 else: raise ValueError('Whaaaaaat input_type?') dataset = TensorDataset(data_tensor=inp, target_tensor=SPCSabs, lens=lens) else: raise ValueError('Whaaaat?') kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {} loader = data_utils.DataLoader(dataset, batch_size=arguments.batch_size, shuffle=True, **kwargs) return loader