我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.datasets.cifar10.load_data()。
def cifar10_extract(label = 'cat'): # acceptable label labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] target_label = labels.index(label) (x_train, t_train), (x_test, t_test) = cifar10.load_data() t_target = t_train==target_label t_target = t_target.reshape(t_target.size) x_target = x_train[t_target] print('extract {} labeled images, shape(5000, 32, 32, 3)'.format(label)) return x_target # shape(generated_images) : (sample_num, w, h, 3)
def get_data(self): (X_train, y_train), (X_test, y_test) = self.load_data() idx_perm = np.random.RandomState(101).permutation(X_train.shape[0]) X_train, y_train = X_train[idx_perm], y_train[idx_perm] X_train = X_train.astype('float32') X_test = X_test.astype('float32') print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') return X_train, X_test, y_train, y_test # custom losses for the CNN
def load_cifar10(): print('load cifar10 data ...') (x_train, y_train), (x_test, y_test) = cifar10.load_data() y_train = to_categorical(y_train, num_classes = 10) y_test = to_categorical(y_test, num_classes = 10) return (x_train/255., y_train), (x_test/255., y_test)
def test_cifar(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (X_train, y_train), (X_test, y_test) = cifar10.load_data() (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse')
def load_dataset(): # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 return X_train, Y_train, X_test, Y_test # creating the network architecture
def load_data(self): # Create the data using magic numbers to approximate the figure in # canevet_icml2016 x = np.linspace(0, 1, self.N).astype(np.float32) ones = np.ones_like(x).astype(int) boundary = np.sin(4*(x + 0.5)**5)/3 + 0.5 data = np.empty(shape=[self.N, self.N, 3], dtype=np.float32) data[:, :, 0] = 1-x for i in range(self.N): data[i, :, 1] = 1-x[i] data[i, :, 2] = 1 / (1 + np.exp(self.smooth*(x - boundary[i]))) data[i, :, 2] = np.random.binomial(ones, data[i, :, 2]) data = data.reshape(-1, 3) np.random.shuffle(data) # Create train and test arrays split = int(len(data)*self.test_split) X_train = data[:-split, :2] y_train = data[:-split, 2] X_test = data[-split:, :2] y_test = data[-split:, 2] return (X_train, y_train), (X_test, y_test)
def load_data(self, limit_data, type='cifar10'): if MyConfig.cache_data is None: if type == 'cifar10': (train_x, train_y), (test_x, test_y) = cifar10.load_data() elif type == 'mnist': (train_x, train_y), (test_x, test_y) = mnist.load_data() elif type == 'cifar100': (train_x, train_y), (test_x, test_y) = cifar100.load_data(label_mode='fine') elif type == 'svhn': (train_x, train_y), (test_x, test_y) = load_data_svhn() train_x, mean_img = self._preprocess_input(train_x, None) test_x, _ = self._preprocess_input(test_x, mean_img) train_y, test_y = map(self._preprocess_output, [train_y, test_y]) res = {'train_x': train_x, 'train_y': train_y, 'test_x': test_x, 'test_y': test_y} for key, val in res.iteritems(): res[key] = MyConfig._limit_data(val, limit_data) MyConfig.cache_data = res self.dataset = MyConfig.cache_data
def load_retures_keras(): from keras.preprocessing.text import Tokenizer from keras.datasets import reuters max_words = 1000 print('Loading data...') (x, y), (_, _) = reuters.load_data(num_words=max_words, test_split=0.) print(len(x), 'train sequences') num_classes = np.max(y) + 1 print(num_classes, 'classes') print('Vectorizing sequence data...') tokenizer = Tokenizer(num_words=max_words) x = tokenizer.sequences_to_matrix(x, mode='binary') print('x_train shape:', x.shape) return x.astype(float), y
def load_imdb(): from keras.preprocessing.text import Tokenizer from keras.datasets import imdb max_words = 1000 print('Loading data...') (x1, y1), (x2, y2) = imdb.load_data(num_words=max_words) x = np.concatenate((x1, x2)) y = np.concatenate((y1, y2)) print(len(x), 'train sequences') num_classes = np.max(y) + 1 print(num_classes, 'classes') print('Vectorizing sequence data...') tokenizer = Tokenizer(num_words=max_words) x = tokenizer.sequences_to_matrix(x, mode='binary') print('x_train shape:', x.shape) return x.astype(float), y
def load_mnist(flatten=True): (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. if flatten: x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) else: x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) # adapt this if using `channels_first` image data format x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) # adapt this if using `channels_first` image data format print(x_train.shape) print(x_test.shape) return (x_train, y_train), (x_test, y_test)
def load_cifar(flatten=True): (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255. x_test /= 255. if flatten: x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) else: x_train = np.reshape(x_train, (len(x_train), 32, 32, 3)) # adapt this if using `channels_first` image data format x_test = np.reshape(x_test, (len(x_test), 32, 32, 3)) # adapt this if using `channels_first` image data format print('bounds:', np.min(x_train), np.max(x_train)) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') return (x_train, y_train), (x_test, y_test)
def load_cifar10_data(img_rows, img_cols): # Load cifar10 training and validation sets (X_train, Y_train), (X_valid, Y_valid) = cifar10.load_data() # Resize trainging images if K.image_dim_ordering() == 'th': X_train = np.array([cv2.resize(img.transpose(1,2,0), (img_rows,img_cols)).transpose(2,0,1) for img in X_train[:nb_train_samples,:,:,:]]) X_valid = np.array([cv2.resize(img.transpose(1,2,0), (img_rows,img_cols)).transpose(2,0,1) for img in X_valid[:nb_valid_samples,:,:,:]]) else: X_train = np.array([cv2.resize(img, (img_rows,img_cols)) for img in X_train[:nb_train_samples,:,:,:]]) X_valid = np.array([cv2.resize(img, (img_rows,img_cols)) for img in X_valid[:nb_valid_samples,:,:,:]]) # Transform targets to keras compatible format Y_train = np_utils.to_categorical(Y_train[:nb_train_samples], num_classes) Y_valid = np_utils.to_categorical(Y_valid[:nb_valid_samples], num_classes) return X_train, Y_train, X_valid, Y_valid
def get_cifar10(): """Retrieve the CIFAR dataset and process the data.""" # Set defaults. nb_classes = 10 batch_size = 64 input_shape = (3072,) # Get the data. (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.reshape(50000, 3072) x_test = x_test.reshape(10000, 3072) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 # convert class vectors to binary class matrices y_train = to_categorical(y_train, nb_classes) y_test = to_categorical(y_test, nb_classes) return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
def get_mnist(): """Retrieve the MNIST dataset and process the data.""" # Set defaults. nb_classes = 10 batch_size = 128 input_shape = (784,) # Get the data. (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 # convert class vectors to binary class matrices y_train = to_categorical(y_train, nb_classes) y_test = to_categorical(y_test, nb_classes) return (nb_classes, batch_size, input_shape, x_train, x_test, y_train, y_test)
def load_mnist(): ''' returns mnist_data ''' # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() if k.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) input_shape = (1, img_rows, img_cols) else: x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) x_train = x_train.astype(k.floatx()) x_train *= 0.96/255 x_train += 0.02 return input_shape, x_train
def get_data(n_train, n_test, nb_classes): # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = mnist.load_data() img_rows, img_cols = (28,28) # make some that are the same X_digits = {ind:X_train[np.where(y_train == ind)] for ind in range(10) } X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) X_train = X_train[:n_train] X_test = X_test[:n_test] X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train[:n_train], nb_classes) Y_test = np_utils.to_categorical(y_test[:n_test], nb_classes) return X_train, Y_train, X_test, Y_test
def get_cifar(nb_classes=10): # input image dimensions # img_rows, img_cols = 32, 32 # # The CIFAR10 images are RGB. # img_channels = 3 # The data, shuffled and split between train and test sets: (X_train, y_train), (X_test, y_test) = cifar10.load_data() X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) return X_train, Y_train, X_test, Y_test
def test_cifar(self): print('cifar10') (X_train, y_train), (X_test, y_test) = cifar10.load_data() print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape) print('cifar100 fine') (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape) print('cifar100 coarse') (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse') print(X_train.shape) print(X_test.shape) print(y_train.shape) print(y_test.shape)
def load_cifar100(): print('load cifar100 data ...') (x_train, y_train), (x_test, y_test) = cifar100.load_data() y_train = to_categorical(y_train, num_classes = 100) y_test = to_categorical(y_test, num_classes = 100) return (x_train/255., y_train), (x_test/255., y_test)
def data_cifar10(): """ Preprocess CIFAR10 dataset :return: """ # These values are specific to CIFAR10 img_rows = 32 img_cols = 32 nb_classes = 10 # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() if keras.backend.image_dim_ordering() == 'th': X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols) else: X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) return X_train, Y_train, X_test, Y_test
def test_reuters(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (X_train, y_train), (X_test, y_test) = reuters.load_data() (X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10)
def test_mnist(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (X_train, y_train), (X_test, y_test) = mnist.load_data()
def test_imdb(): # only run data download tests 20% of the time # to speed up frequent testing random.seed(time.time()) if random.random() > 0.8: (X_train, y_train), (X_test, y_test) = imdb.load_data() (X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40)
def generate_training_data(data='mnist'): if data == 'mnist': (X_train, _), (_, _) = mnist.load_data() X_train = np.expand_dims(X_train, -1) / 255. elif data == 'cifar': (X_train, _), (_, _) = cifar10.load_data() X_train = X_train / 255. else: raise ValueError('data should be "mnist" or "cifar", got ' '"%s".' % data) # Downsamples by averaging adjacent pixels. X_low_dim = mean_bins(X_train) return X_low_dim, X_train
def get_dataset(): sys.stdout.write('Loading Dataset\n\n') sys.stdout.flush() (X_train, y_train), (X_test, y_test) = cifar10.load_data() # we perform a series of normalization and binarizer on the dataset here X_train = X_train.astype('float32') / dtype_mult X_test = X_test.astype('float32') / dtype_mult y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) return X_train, y_train, X_test, y_test
def get_dataset(): sys.stdout.write('Loading Dataset\n') sys.stdout.flush() (X_train, y_train), (X_test, y_test) = cifar10.load_data() return X_train, y_train, X_test, y_test
def data(): nb_classes = 10 # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) return datagen, X_train, Y_train, X_test, Y_test
def load_data(self): # Load the original data (X_train, y_train), (X_test, y_test) = cifar10.load_data() # Get only the classes given idxs_train = np.arange(len(y_train))[ np.logical_or(*[y_train == c for c in self.classes]).ravel() ] idxs_test = np.arange(len(y_test))[ np.logical_or(*[y_test == c for c in self.classes]).ravel() ] X_train = X_train[idxs_train] y_train = y_train[idxs_train] X_test = X_test[idxs_test] y_test = y_test[idxs_test] for i, c in enumerate(self.classes): y_train[y_train == c] = i y_test[y_test == c] = i # Replicate on document in the training set x, y = X_train[self.replicate_idx], y_train[self.replicate_idx] x = np.tile(x, (self.replicate, 1, 1, 1)) y = np.tile(y, (self.replicate, 1)) return ( (np.vstack([X_train, x]), np.vstack([y_train, y])), (X_test, y_test) )
def from_loadable(cls, dataset): (a, b), (c, d) = dataset.load_data() return cls(a, b, c, d)
def get_data(self): from keras.datasets import cifar10 (X_train, y_train), (X_test, y_test) = cifar10.load_data() if self.verbose: print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices from keras.utils import np_utils Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 img_mean = X_train.mean(axis=0)[np.newaxis,:,:,:] N = X_train.shape[0] perms = np.random.permutation(N) self.X_train = X_train[perms,:] self.Y_train = Y_train[perms,:] self.X_test = X_test self.Y_test = Y_test self.rawdata=[self.X_train, self.Y_train, self.X_test, self.Y_test, img_mean]
def load_data(self): self.cifar_data = cifar10.load_data()
def __init__(self, **kwargs): super(CIFAR10, self).__init__(**kwargs) self.cls_names = cls_names (X_train, y_train), (X_test, y_test) = cifar10.load_data() y_train = y_train.reshape((y_train.shape[0])) y_test = y_test.reshape((y_test.shape[0])) if self.data_set == 'train': X = X_train y = y_train elif self.data_set == 'train-small': X = X_train[:1000] y = y_train[:1000] elif self.data_set == 'test': X = X_test y = y_test elif self.data_set == 'test-small': X = X_test[:1000] y = y_test[:1000] elif self.data_set == 'all': X = np.vstack((X_train, X_test)) y = np.vstack((y_train, y_test)) else: raise ValueError('MNIST Unsupported data_set: ', self.data_set) if X.shape[-1] == 3: X = X.transpose((0, 3, 1, 2)) # normalization if self.norm: X = X.astype(np.float32) / 255 X = self.init_layout_X(X) y = self.init_layout_y(y) self.X = X self.y = y
def data_mix(self): # randomly choose dataset dataset = random.choice(['mnist', 'cifar10', 'cifar100']) # n_labels = 10 if dataset == "mnist": data = mnist.load_data() if dataset == "cifar10": data = cifar10.load_data() if dataset == "cifar100": data = cifar100.load_data() n_labels = 100 # Choose dataset size. This affects regularization needed r = np.random.rand() # not using full dataset to make regularization more important and # speed up testing a little bit data_size = int(2000 * (1 - r) + 40000 * r) # I do not use test data for validation, but last 10000 instances in dataset # so that trained models can be compared to results in literature (CX, CY), (CXt, CYt) = data if dataset == "mnist": CX = np.expand_dims(CX, axis=1) data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:] return data, n_labels
def data_mix(self): # randomly choose dataset dataset = random.choice(['mnist', 'cifar10', 'cifar100'])# n_labels = 10 if dataset == "mnist": data = mnist.load_data() if dataset == "cifar10": data = cifar10.load_data() if dataset == "cifar100": data = cifar100.load_data() n_labels = 100 # Choose dataset size. This affects regularization needed r = np.random.rand() # not using full dataset to make regularization more important and # speed up testing a little bit data_size = int( 2000 * (1-r) + 40000 * r ) # I do not use test data for validation, but last 10000 instances in dataset # so that trained models can be compared to results in literature (CX, CY), (CXt, CYt) = data if dataset == "mnist": CX = np.expand_dims(CX, axis=1) data = CX[:data_size], CY[:data_size], CX[-10000:], CY[-10000:]; return data, n_labels
def load_samples(): from keras.datasets import cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() return list(zip(x_train, map(int, y_train))), list(zip(x_test, map(int, y_test)))
def __init__(self, epochs=100, verbose=1, limit_data=False, name='default_name', evoluation_time=1, clean=True, dataset_type='cifar10', max_pooling_cnt=0, debug=False): # for all model: self.dataset_type = dataset_type self.limit_data = limit_data if dataset_type == 'cifar10' or dataset_type == 'svhn' or dataset_type == 'cifar100': self.input_shape = (32, 32, 3) else: self.input_shape = (28, 28, 1) if dataset_type == 'cifar100': self.nb_class = 100 else: self.nb_class = 10 self.dataset = None if limit_data: self.load_data(9999, type=self.dataset_type) else: self.load_data(1, type=self.dataset_type) # for ga: self.evoluation_time = evoluation_time # for single model self.set_name(name, clean=clean) self.batch_size = 256 self.epochs = epochs self.verbose = verbose self.lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), cooldown=0, patience=10, min_lr=0.5e-7) self.early_stopper = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=10) self.csv_logger = None self.set_logger_path(self.name + '.csv') self.debug = debug self.max_pooling_limit = int(log(min(self.input_shape[0], self.input_shape[1]), 2)) - 2 self.max_pooling_cnt = max_pooling_cnt self.model_max_conv_width = 1024 self.model_min_conv_width = 128 self.model_max_depth = 20 self.kernel_regularizer_l2 = 0.01