我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用torch.utils()。
def test_sequence_wise_torch_data_loader(): import torch from torch.utils import data as data_utils X, Y = _get_small_datasets(padded=False) class TorchDataset(data_utils.Dataset): def __init__(self, X, Y): self.X = X self.Y = Y def __getitem__(self, idx): return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx]) def __len__(self): return len(self.X) def __test(X, Y, batch_size): dataset = TorchDataset(X, Y) loader = data_utils.DataLoader( dataset, batch_size=batch_size, num_workers=1, shuffle=True) for idx, (x, y) in enumerate(loader): assert len(x.shape) == len(y.shape) assert len(x.shape) == 3 print(idx, x.shape, y.shape) # Test with batch_size = 1 yield __test, X, Y, 1 # Since we have variable length frames, batch size larger than 1 causes # runtime error. yield raises(RuntimeError)(__test), X, Y, 2 # For padded dataset, which can be reprensented by (N, T^max, D), batchsize # can be any number. X, Y = _get_small_datasets(padded=True) yield __test, X, Y, 1 yield __test, X, Y, 2
def test_frame_wise_torch_data_loader(): import torch from torch.utils import data as data_utils X, Y = _get_small_datasets(padded=False) # Since torch's Dataset (and Chainer, and maybe others) assumes dataset has # fixed size length, i.e., implements `__len__` method, we need to know # number of frames for each utterance. # Sum of the number of frames is the dataset size for frame-wise iteration. lengths = np.array([len(x) for x in X], dtype=np.int) # For the above reason, we need to explicitly give the number of frames. X = MemoryCacheFramewiseDataset(X, lengths, cache_size=len(X)) Y = MemoryCacheFramewiseDataset(Y, lengths, cache_size=len(Y)) class TorchDataset(data_utils.Dataset): def __init__(self, X, Y): self.X = X self.Y = Y def __getitem__(self, idx): return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx]) def __len__(self): return len(self.X) def __test(X, Y, batch_size): dataset = TorchDataset(X, Y) loader = data_utils.DataLoader( dataset, batch_size=batch_size, num_workers=1, shuffle=True) for idx, (x, y) in enumerate(loader): assert len(x.shape) == 2 assert len(y.shape) == 2 yield __test, X, Y, 128 yield __test, X, Y, 256
def forward(self, sequence, lengths, h, c): sequence = nn.utils.rnn.pack_padded_sequence(sequence, lengths, batch_first=True) output, (h, c) = self.lstm(sequence, (h, c)) output, output_lengths = nn.utils.rnn.pad_packed_sequence( output, batch_first=True) output = self.hidden2out(output) return output
def load_state_dict(model, model_urls, model_root): from torch.utils import model_zoo from torch import nn import re from collections import OrderedDict own_state_old = model.state_dict() own_state = OrderedDict() # remove all 'group' string for k, v in own_state_old.items(): k = re.sub('group\d+\.', '', k) own_state[k] = v state_dict = model_zoo.load_url(model_urls, model_root) for name, param in state_dict.items(): if name not in own_state: print(own_state.keys()) raise KeyError('unexpected key "{}" in state_dict' .format(name)) if isinstance(param, nn.Parameter): # backwards compatibility for serialized parameters param = param.data own_state[name].copy_(param) missing = set(own_state.keys()) - set(state_dict.keys()) if len(missing) > 0: raise KeyError('missing keys in state_dict: "{}"'.format(missing))
def main (): global args args = parser.parse_args() print('\nCUDA status: {}'.format(args.cuda)) print('\nLoad pretrained model on Imagenet') model = pretrainedmodels.__dict__[args.arch](num_classes=1000, pretrained='imagenet') model.eval() if args.cuda: model.cuda() features_size = model.last_linear.in_features model.last_linear = pretrainedmodels.utils.Identity() # Trick to get inputs (features) from last_linear print('\nLoad datasets') tf_img = pretrainedmodels.utils.TransformImage(model) train_set = pretrainedmodels.datasets.Voc2007Classification(args.dir_datasets, 'train', transform=tf_img) val_set = pretrainedmodels.datasets.Voc2007Classification(args.dir_datasets, 'val', transform=tf_img) test_set = pretrainedmodels.datasets.Voc2007Classification(args.dir_datasets, 'test', transform=tf_img) train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=False, num_workers=2) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=2) test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=2) print('\nLoad features') dir_features = os.path.join(args.dir_outputs, 'data/{}'.format(args.arch)) path_train_data = '{}/{}set.pth'.format(dir_features, 'train') path_val_data = '{}/{}set.pth'.format(dir_features, 'val') path_test_data = '{}/{}set.pth'.format(dir_features, 'test') features = {} targets = {} features['train'], targets['train'] = extract_features_targets(model, features_size, train_loader, path_train_data, args.cuda) features['val'], targets['val'] = extract_features_targets(model, features_size, val_loader, path_val_data, args.cuda) features['test'], targets['test'] = extract_features_targets(model, features_size, test_loader, path_test_data, args.cuda) features['trainval'] = torch.cat([features['train'], features['val']], 0) targets['trainval'] = torch.cat([targets['train'], targets['val']], 0) print('\nTrain Support Vector Machines') if args.train_split == 'train' and args.test_split == 'val': print('\nHyperparameters search: train multilabel classifiers (on-versus-all) on train/val') elif args.train_split == 'trainval' and args.test_split == 'test': print('\nEvaluation: train a multilabel classifier on trainval/test') else: raise ValueError('Trying to train on {} and eval on {}'.format(args.train_split, args.test_split)) train_multilabel(features, targets, train_set.classes, args.train_split, args.test_split, C=args.C)