我们从Python开源项目中,提取了以下38个代码示例,用于说明如何使用torchvision.transforms.CenterCrop()。
def __call__(self, img): for attempt in range(10): area = img.size[0] * img.size[1] target_area = random.uniform(0.9, 1.) * area aspect_ratio = random.uniform(7. / 8, 8. / 7) w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if random.random() < 0.5: w, h = h, w if w <= img.size[0] and h <= img.size[1]: x1 = random.randint(0, img.size[0] - w) y1 = random.randint(0, img.size[1] - h) img = img.crop((x1, y1, x1 + w, y1 + h)) assert (img.size == (w, h)) return img.resize((self.size, self.size), self.interpolation) # Fallback scale = Scale(self.size, interpolation=self.interpolation) crop = CenterCrop(self.size) return crop(scale(img))
def __call__(self, img): for attempt in range(10): area = img.size[0] * img.size[1] target_area = random.uniform(0.70, 0.98) * area aspect_ratio = random.uniform(5. / 8, 8. / 5) w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if random.random() < 0.5: w, h = h, w if w <= img.size[0] and h <= img.size[1]: x1 = random.randint(0, img.size[0] - w) y1 = random.randint(0, img.size[1] - h) img = img.crop((x1, y1, x1 + w, y1 + h)) assert (img.size == (w, h)) return img.resize((self.size, self.size), self.interpolation) # Fallback scale = Scale(self.size, interpolation=self.interpolation) crop = CenterCrop(self.size) return crop(scale(img))
def test_getitem(self): import torchvision.transforms as t from reid.datasets.viper import VIPeR from reid.utils.data.preprocessor import Preprocessor root, split_id, num_val = '/tmp/open-reid/viper', 0, 100 dataset = VIPeR(root, split_id=split_id, num_val=num_val, download=True) preproc = Preprocessor(dataset.train, root=dataset.images_dir, transform=t.Compose([ t.Scale(256), t.CenterCrop(224), t.ToTensor(), t.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) self.assertEquals(len(preproc), len(dataset.train)) img, pid, camid = preproc[0] self.assertEquals(img.size(), (3, 224, 224))
def imagenet(): channel_stats = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transformation = data.TransformTwice(transforms.Compose([ transforms.RandomRotation(10), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1), transforms.ToTensor(), transforms.Normalize(**channel_stats) ])) eval_transformation = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(**channel_stats) ]) return { 'train_transformation': train_transformation, 'eval_transformation': eval_transformation, 'datadir': 'data-local/images/ilsvrc2012/', 'num_classes': 1000 }
def imagenet_transform(scale_size=256, input_size=224, train=True, allow_var_size=False): normalize = {'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]} if train: return transforms.Compose([ transforms.Scale(scale_size), transforms.RandomCrop(input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(**normalize) ]) elif allow_var_size: return transforms.Compose([ transforms.Scale(scale_size), transforms.ToTensor(), transforms.Normalize(**normalize) ]) else: return transforms.Compose([ transforms.Scale(scale_size), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize(**normalize) ])
def LSUN_loader(root, image_size, classes=['bedroom'], normalize=True): """ Function to load torchvision dataset object based on just image size Args: root = If your dataset is downloaded and ready to use, mention the location of this folder. Else, the dataset will be downloaded to this location image_size = Size of every image classes = Default class is 'bedroom'. Other available classes are: 'bridge', 'church_outdoor', 'classroom', 'conference_room', 'dining_room', 'kitchen', 'living_room', 'restaurant', 'tower' normalize = Requirement to normalize the image. Default is true """ transformations = [transforms.Scale(image_size), transforms.CenterCrop(image_size), transforms.ToTensor()] if normalize == True: transformations.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) for c in classes: c = c + '_train' lsun_data = dset.LSUN(db_path=root, classes=classes, transform=transforms.Compose(transformations)) return lsun_data
def transform(is_train=True, normalize=True): """ Returns a transform object """ filters = [] filters.append(Scale(256)) if is_train: filters.append(RandomCrop(224)) else: filters.append(CenterCrop(224)) if is_train: filters.append(RandomHorizontalFlip()) filters.append(ToTensor()) if normalize: filters.append(Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) return Compose(filters)
def load_data(resize): data_transforms = { 'train': transforms.Compose([ transforms.RandomSizedCrop(max(resize)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ #Higher scale-up for inception transforms.Scale(int(max(resize)/224*256)), transforms.CenterCrop(max(resize)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } data_dir = 'PlantVillage' dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'val']} dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']} dset_classes = dsets['train'].classes return dset_loaders['train'], dset_loaders['val']
def scale_crop(input_size, scale_size=None, normalize=__imagenet_stats): t_list = [ transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize(**normalize), ] if scale_size != input_size: t_list = [transforms.Scale(scale_size)] + t_list return transforms.Compose(t_list)
def __call__(self, img): img_flip = img.transpose(Image.FLIP_LEFT_RIGHT) center_crop = transforms.CenterCrop(self.size) img_list = [] w, h = img.size for image in [img, img_flip]: img_list.append(center_crop(image)) img_list.append(image.crop((0, 0, self.size, self.size))) img_list.append(image.crop((w-self.size, 0, w, self.size))) img_list.append(image.crop((0, h - self.size, self.size, h))) img_list.append(image.crop((w-self.size, h-self.size, w, h))) imgs = None to_tensor = transforms.ToTensor() for image in img_list: if imgs is None: temp_img = to_tensor(image) imgs = self.normalize(temp_img) else: temp_img = to_tensor(image) temp_img = self.normalize(temp_img) imgs = torch.cat((imgs, temp_img)) return imgs # --------------------------------------------------------------------------------------------- # from: https://github.com/eladhoffer/convNet.pytorch/blob/master/preprocess.py
def default_inception_transform(img_size): tf = transforms.Compose([ transforms.Scale(img_size), transforms.CenterCrop(img_size), transforms.ToTensor(), LeNormalize(), ]) return tf
def default_transform(size): transform = transforms.Compose([ transforms.Scale(size), transforms.CenterCrop(size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], # resnet imagnet std=[0.229, 0.224, 0.225]) ]) return transform
def img_transform(crop_size, upscale_factor=1): return transforms.Compose([ transforms.Scale(crop_size // upscale_factor), transforms.CenterCrop(crop_size // upscale_factor), transforms.ToTensor()])
def image_loader(image_name, max_sz=256): """ forked from pytorch tutorials """ r_image = Image.open(image_name) mindim = np.min((np.max(r_image.size[:2]), max_sz)) loader = transforms.Compose([transforms.CenterCrop(mindim), transforms.ToTensor()]) image = Variable(loader(r_image)) return image.unsqueeze(0)
def __init__(self, args): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.4,0.4,0.4), transforms.ToTensor(), Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), normalize, ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) trainset = MINCDataloder(root=os.path.expanduser('~/data/minc-2500/'), train=True, transform=transform_train) testset = MINCDataloder(root=os.path.expanduser('~/data/minc-2500/'), train=False, transform=transform_test) kwargs = {'num_workers': 8, 'pin_memory': True} if args.cuda else {} trainloader = torch.utils.data.DataLoader(trainset, batch_size= args.batch_size, shuffle=True, **kwargs) testloader = torch.utils.data.DataLoader(testset, batch_size= args.test_batch_size, shuffle=False, **kwargs) self.trainloader = trainloader self.testloader = testloader
def __init__(self, opt): transform_list = [] if (opt.crop_height > 0) and (opt.crop_width > 0): transform_list.append(transforms.CenterCrop(opt.crop_height, crop_width)) elif opt.crop_size > 0: transform_list.append(transforms.CenterCrop(opt.crop_size)) transform_list.append(transforms.Scale(opt.image_size)) transform_list.append(transforms.CenterCrop(opt.image_size)) transform_list.append(transforms.ToTensor()) if opt.dataset == 'cifar10': dataset1 = datasets.CIFAR10(root = opt.dataroot, download = True, transform = transforms.Compose(transform_list)) dataset2 = datasets.CIFAR10(root = opt.dataroot, train = False, transform = transforms.Compose(transform_list)) def get_data(k): if k < len(dataset1): return dataset1[k][0] else: return dataset2[k - len(dataset1)][0] else: if opt.dataset in ['imagenet', 'folder', 'lfw']: dataset = datasets.ImageFolder(root = opt.dataroot, transform = transforms.Compose(transform_list)) elif opt.dataset == 'lsun': dataset = datasets.LSUN(db_path = opt.dataroot, classes = [opt.lsun_class + '_train'], transform = transforms.Compose(transform_list)) def get_data(k): return dataset[k][0] data_index = torch.load(os.path.join(opt.dataroot, 'data_index.pt')) train_index = data_index['train'] self.opt = opt self.get_data = get_data self.train_index = data_index['train'] self.counter = 0
def get_dataloader(opt): if opt.dataset in ['imagenet', 'folder', 'lfw']: # folder dataset dataset = dset.ImageFolder(root=opt.dataroot, transform=transforms.Compose([ transforms.Scale(opt.imageScaleSize), transforms.CenterCrop(opt.imageSize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif opt.dataset == 'lsun': dataset = dset.LSUN(db_path=opt.dataroot, classes=['bedroom_train'], transform=transforms.Compose([ transforms.Scale(opt.imageScaleSize), transforms.CenterCrop(opt.imageSize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif opt.dataset == 'cifar10': dataset = dset.CIFAR10(root=opt.dataroot, download=True, transform=transforms.Compose([ transforms.Scale(opt.imageSize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) ) assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=int(opt.workers)) return dataloader
def Imagenet_LMDB_generate(imagenet_dir, output_dir, make_val=False, make_train=False): # the imagenet_dir should have direction named 'train' or 'val',with 1000 folders of raw jpeg photos train_name = 'imagenet_train_lmdb' val_name = 'imagenet_val_lmdb' def target_trans(target): return target if make_val: val_lmdb=lmdb_datasets.LMDB_generator(osp.join(output_dir,val_name)) def trans_val_data(dir): tensor = transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor() ])(dir) tensor=(tensor.numpy()*255).astype(np.uint8) return tensor val = datasets.ImageFolder(osp.join(imagenet_dir,'val'), trans_val_data,target_trans) val_lmdb.write_classification_lmdb(val, num_per_dataset=DATASET_SIZE) if make_train: train_lmdb = lmdb_datasets.LMDB_generator(osp.join(output_dir, train_name)) def trans_train_data(dir): tensor = transforms.Compose([ transforms.Scale(256), transforms.ToTensor() ])(dir) tensor=(tensor.numpy()*255).astype(np.uint8) return tensor train = datasets.ImageFolder(osp.join(imagenet_dir, 'train'), trans_train_data, target_trans) train.imgs=np.random.permutation(train.imgs) train_lmdb.write_classification_lmdb(train, num_per_dataset=DATASET_SIZE, write_shape=True)
def __init_functions(self, w, h): funcs = [] transp = lambda x: x.transpose(Image.FLIP_LEFT_RIGHT) funcs.append(lambda _img: CenterCrop(self.size)(_img)) funcs.append(lambda _img: _img.crop((0, 0, self.size, self.size))) funcs.append(lambda _img: _img.crop((w - self.size, 0, w, self.size))) funcs.append(lambda _img: _img.crop((0, h - self.size, self.size, h))) funcs.append(lambda _img: _img.crop((w - self.size, h - self.size, w, h))) funcs.append(lambda _img: CenterCrop(self.size)(transp(_img))) funcs.append(lambda _img: transp(_img).crop((0, 0, self.size, self.size))) funcs.append(lambda _img: transp(_img).crop((w - self.size, 0, w, self.size))) funcs.append(lambda _img: transp(_img).crop((0, h - self.size, self.size, h))) funcs.append(lambda _img: transp(_img).crop((w - self.size, h - self.size, w, h))) return funcs
def imagenet_like(): crop_size = 299#224 train_transformations = transforms.Compose([ transforms.RandomSizedCrop(crop_size), transforms.RandomHorizontalFlip(), lambda img: img if random.random() < 0.5 else img.transpose(Image.FLIP_TOP_BOTTOM), transforms.ToTensor(), ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), normalize, ]) val_transformations = transforms.Compose([ transforms.CenterCrop(crop_size), transforms.ToTensor(), normalize, ]) test_transformation = transforms.Compose([ #TenCropPick(224), SpatialPick(), #transforms.CenterCrop(crop_size), transforms.ToTensor(), normalize, ]) return {'train': train_transformations, 'val': val_transformations, 'test': test_transformation}
def get_transform(target_size, central_fraction=1.0): return transforms.Compose([ transforms.Scale(int(target_size / central_fraction)), transforms.CenterCrop(target_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
def getLoader(datasetName, dataroot, originalSize, imageSize, batchSize=64, workers=4, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), split='train', shuffle=True, seed=None): #import pdb; pdb.set_trace() from datasets.folder import ImageFolder as commonDataset import torchvision.transforms as transforms if split == 'train': dataset = commonDataset(root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), transforms.RandomCrop(imageSize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std), ]), seed=seed) else: dataset = commonDataset(root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize(mean, std), ]), seed=seed) assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, shuffle=shuffle, num_workers=int(workers)) return dataloader
def __init__(self, model, scale=1.050): self.input_size = model.input_size self.input_space = model.input_space self.input_range = model.input_range self.mean = model.mean self.std = model.std self.scale = scale self.tf = transforms.Compose([ transforms.Scale(int(round(max(self.input_size)*self.scale))), transforms.CenterCrop(max(self.input_size)), transforms.ToTensor(), ToSpaceBGR(self.input_space=='BGR'), ToRange255(max(self.input_range)==255), transforms.Normalize(mean=self.mean, std=self.std) ])
def get_augmented_test_set(data_root, idx_file, scale_size, crop_size, aug_type='ten_crop', seg_root=None, mixture=False): dsets = [] if aug_type == 'ten_crop': crop_types = [0, 1, 2, 3, 4] # 0: center crop, # 1: top left crop, 2: top right crop # 3: bottom right crop, 4: bottom left crop flips = [0, 1] # 0: no flip, 1: horizontal flip for i in crop_types: for j in flips: data_transform = transforms.Compose([ transforms.Scale(scale_size), # transforms.CenterCrop(crop_size), transforms.ToTensor(), RandomFlip(flips[j]), SpecialCrop((crop_size, crop_size), crop_type=crop_types[i]), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if mixture: seg_transform = transforms.Compose([ transforms.Scale(crop_size), # transforms.CenterCrop(crop_size), transforms.ToTensor(), RandomFlip(flips[j]), # SpecialCrop(crop_size=(crop_size, crop_size), crop_type=crop_types[i]), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dsets.append(MyImageFolder(root = data_root, idx_file = idx_file, transform = data_transform, seg_transform = seg_transform, seg_root = seg_root)) else: dsets.append(MyImageFolder(root = data_root, idx_file = idx_file, transform = data_transform)) return dsets
def get_transform(data_name, split_name, opt): normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) t_list = [] if split_name == 'train': t_list = [transforms.RandomSizedCrop(opt.crop_size), transforms.RandomHorizontalFlip()] elif split_name == 'val': t_list = [transforms.Scale(256), transforms.CenterCrop(224)] elif split_name == 'test': t_list = [transforms.Scale(256), transforms.CenterCrop(224)] t_end = [transforms.ToTensor(), normalizer] transform = transforms.Compose(t_list + t_end) return transform
def get_transforms_eval(model_name, img_size=224, crop_pct=None): crop_pct = crop_pct or DEFAULT_CROP_PCT if 'dpn' in model_name: if crop_pct is None: # Use default 87.5% crop for model's native img_size # but use 100% crop for larger than native as it # improves test time results across all models. if img_size == 224: scale_size = int(math.floor(img_size / DEFAULT_CROP_PCT)) else: scale_size = img_size else: scale_size = int(math.floor(img_size / crop_pct)) normalize = transforms.Normalize( mean=[124 / 255, 117 / 255, 104 / 255], std=[1 / (.0167 * 255)] * 3) elif 'inception' in model_name: scale_size = int(math.floor(img_size / crop_pct)) normalize = LeNormalize() else: scale_size = int(math.floor(img_size / crop_pct)) normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) return transforms.Compose([ transforms.Scale(scale_size, Image.BICUBIC), transforms.CenterCrop(img_size), transforms.ToTensor(), normalize])
def __init__(self, num_classes=1000): super(AlexNetOWT_BN, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, bias=False), nn.MaxPool2d(kernel_size=3, stride=2), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), nn.MaxPool2d(kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.BatchNorm2d(192), nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(384), nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(256), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), nn.MaxPool2d(kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.BatchNorm2d(256) ) self.classifier = nn.Sequential( nn.Linear(256 * 6 * 6, 4096, bias=False), nn.BatchNorm1d(4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096, bias=False), nn.BatchNorm1d(4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, num_classes) ) self.regime = [ {'epoch': 0, 'optimizer': 'SGD', 'lr': 1e-2, 'weight_decay': 5e-4, 'momentum': 0.9}, {'epoch': 10, 'lr': 5e-3}, {'epoch': 15, 'lr': 1e-3, 'weight_decay': 0}, {'epoch': 20, 'lr': 5e-4}, {'epoch': 25, 'lr': 1e-4} ] normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.input_transform = { 'train': transforms.Compose([ transforms.Scale(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]), 'eval': transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) }
def init_cnn(self, opt): """Lazy initialization of preprocessor model in case we don't need any image preprocessing. """ try: import torch self.use_cuda = (not opt.get('no_cuda', False) and torch.cuda.is_available()) self.torch = torch except ModuleNotFoundError: raise ModuleNotFoundError('Need to install Pytorch: go to pytorch.org') from torch.autograd import Variable import torchvision import torchvision.transforms as transforms import torch.nn as nn try: import h5py self.h5py = h5py except ModuleNotFoundError: raise ModuleNotFoundError('Need to install h5py') if 'image_mode' not in opt or 'image_size' not in opt: raise RuntimeError( 'Need to add image arguments to opt. See ' 'parlai.core.params.ParlaiParser.add_image_args') self.image_mode = opt['image_mode'] self.image_size = opt['image_size'] self.crop_size = opt['image_cropsize'] if self.use_cuda: print('[ Using CUDA ]') torch.cuda.set_device(opt.get('gpu', -1)) cnn_type, layer_num = self.image_mode_switcher() # initialize the pretrained CNN using pytorch. CNN = getattr(torchvision.models, cnn_type) # cut off the additional layer. self.netCNN = nn.Sequential( *list(CNN(pretrained=True).children())[:layer_num]) # initialize the transform function using torch vision. self.transform = transforms.Compose([ transforms.Scale(self.image_size), transforms.CenterCrop(self.crop_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # container for single image self.xs = torch.zeros(1, 3, self.crop_size, self.crop_size) if self.use_cuda: self.netCNN.cuda() self.xs = self.xs.cuda() # make self.xs variable. self.xs = Variable(self.xs)
def get_data(args, train_flag=True): transform = transforms.Compose([ transforms.Scale(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) if args.dataset in ['imagenet', 'folder', 'lfw']: dataset = dset.ImageFolder(root=args.dataroot, transform=transform) elif args.dataset == 'lsun': dataset = dset.LSUN(db_path=args.dataroot, classes=['bedroom_train'], transform=transform) elif args.dataset == 'cifar10': dataset = dset.CIFAR10(root=args.dataroot, download=True, train=train_flag, transform=transform) elif args.dataset == 'cifar100': dataset = dset.CIFAR100(root=args.dataroot, download=True, train=train_flag, transform=transform) elif args.dataset == 'mnist': dataset = dset.MNIST(root=args.dataroot, download=True, train=train_flag, transform=transform) elif args.dataset == 'celeba': imdir = 'train' if train_flag else 'val' dataroot = os.path.join(args.dataroot, imdir) if args.image_size != 64: raise ValueError('the image size for CelebA dataset need to be 64!') dataset = FolderWithImages(root=dataroot, input_transform=transforms.Compose([ ALICropAndScale(), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]), target_transform=transforms.ToTensor() ) else: raise ValueError("Unknown dataset %s" % (args.dataset)) return dataset
def __init__(self, num_classes=1000): super(AlexNetOWT_BN, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2, bias=False), nn.MaxPool2d(kernel_size=3, stride=2), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.Conv2d(64, 192, kernel_size=5, padding=2, bias=False), nn.MaxPool2d(kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.BatchNorm2d(192), nn.Conv2d(192, 384, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(384), nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(256), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), nn.MaxPool2d(kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.BatchNorm2d(256) ) self.classifier = nn.Sequential( nn.Linear(256 * 6 * 6, 4096, bias=False), nn.BatchNorm1d(4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096, bias=False), nn.BatchNorm1d(4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, num_classes) ) self.regime = { 0: {'optimizer': 'SGD', 'lr': 1e-2, 'weight_decay': 5e-4, 'momentum': 0.9}, 10: {'lr': 5e-3}, 15: {'lr': 1e-3, 'weight_decay': 0}, 20: {'lr': 5e-4}, 25: {'lr': 1e-4} } normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.input_transform = { 'train': transforms.Compose([ transforms.Scale(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]), 'eval': transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) }
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.Scale(args.crop_size), #transforms.CenterCrop(args.crop_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build Models encoder = EncoderCNN(args.embed_size) encoder.eval() # evaluation mode (BN uses moving mean/variance) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Load the trained model parameters encoder.load_state_dict(torch.load(args.encoder_path)) decoder.load_state_dict(torch.load(args.decoder_path)) # Prepare Image image = load_image(args.image,transform) image_tensor = to_var(image,volatile=True) # If use gpu if torch.cuda.is_available(): encoder.cuda() decoder.cuda() image_tensor = image_tensor.cuda() # Generate caption from image feature = encoder(image_tensor) sampled_ids = decoder.sample(feature, args.length) sampled_ids = sampled_ids.cpu().data.numpy() # Decode word_ids to words sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] if word != '<start>' and word != '<end>': sampled_caption.append(word) if word == '<end>': break sentence = ''.join(sampled_caption) # Print out image and generated caption. print(sentence) #TODO only call plt if we know we are in xwindows #plt.imshow(np.asarray(image))
def main(): global args args = parser.parse_args() print("=> using pre-trained model '{}'".format(args.arch)) model = convnets.factory({'arch':args.arch}, cuda=True, data_parallel=True) extract_name = 'arch,{}_size,{}'.format(args.arch, args.size) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.dataset == 'coco': if 'coco' not in args.dir_data: raise ValueError('"coco" string not in dir_data') dataset = datasets.COCOImages(args.data_split, dict(dir=args.dir_data), transform=transforms.Compose([ transforms.Scale(args.size), transforms.CenterCrop(args.size), transforms.ToTensor(), normalize, ])) elif args.dataset == 'vgenome': if args.data_split != 'train': raise ValueError('train split is required for vgenome') if 'vgenome' not in args.dir_data: raise ValueError('"vgenome" string not in dir_data') dataset = datasets.VisualGenomeImages(args.data_split, dict(dir=args.dir_data), transform=transforms.Compose([ transforms.Scale(args.size), transforms.CenterCrop(args.size), transforms.ToTensor(), normalize, ])) data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) dir_extract = os.path.join(args.dir_data, 'extract', extract_name) path_file = os.path.join(dir_extract, args.data_split + 'set') os.system('mkdir -p ' + dir_extract) extract(data_loader, model, path_file, args.mode)
def getLoader(datasetName, dataroot, originalSize, imageSize, batchSize=64, workers=4, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), split='train', shuffle=True, seed=None): #import pdb; pdb.set_trace() if datasetName == 'trans': from datasets.trans import trans as commonDataset import transforms.pix2pix as transforms elif datasetName == 'folder': from torchvision.datasets.folder import ImageFolder as commonDataset import torchvision.transforms as transforms elif datasetName == 'pix2pix': from datasets.pix2pix import pix2pix as commonDataset import transforms.pix2pix as transforms if datasetName != 'folder': if split == 'train': dataset = commonDataset(root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), transforms.RandomCrop(imageSize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std), ]), seed=seed) else: dataset = commonDataset(root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize(mean, std), ]), seed=seed) else: if split == 'train': dataset = commonDataset(root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), transforms.RandomCrop(imageSize), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std), ])) else: dataset = commonDataset(root=dataroot, transform=transforms.Compose([ transforms.Scale(originalSize), transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize(mean, std), ])) assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, shuffle=shuffle, num_workers=int(workers)) return dataloader
def get_multi_scale_crop_test_set(data_root, idx_file, scale_sizes, crop_size, aug_type='forty_crop', seg_root=None, mixture=False): dsets = [] if aug_type == 'forty_crop': for scale_size in scale_sizes: crop_types = [0, 1, 2, 3, 4] # 0: center crop, # 1: top left crop, 2: top right crop # 3: bottom right crop, 4: bottom left crop flips = [0, 1] # 0: no flip, 1: horizontal flip for i in crop_types: for j in flips: data_transform = transforms.Compose([ transforms.Scale(scale_size), # transforms.CenterCrop(crop_size), transforms.ToTensor(), RandomFlip(flips[j]), SpecialCrop((crop_size, crop_size), crop_type=crop_types[i]), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) if mixture: seg_transform = transforms.Compose([ transforms.Scale(crop_size), # transforms.CenterCrop(crop_size), transforms.ToTensor(), RandomFlip(flips[j]), # SpecialCrop(crop_size=(crop_size, crop_size), crop_type=crop_types[i]), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dsets.append(MyImageFolder(root = data_root, idx_file = idx_file, transform = data_transform, seg_transform = seg_transform, seg_root = seg_root)) else: dsets.append(MyImageFolder(root = data_root, idx_file = idx_file, transform = data_transform)) return dsets