我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.load()。
def get_named_set(lang_codes, feature_set): if feature_set == 'id': return get_id_set(lang_codes) if feature_set not in FEATURE_SETS: print("ERROR: Invalid feature set " + feature_set, file=sys.stderr) sys.exit() filename, source, prefix = FEATURE_SETS[feature_set] feature_database = np.load(filename) lang_codes = [ get_language_code(l, feature_database) for l in lang_codes ] lang_indices = [ get_language_index(l, feature_database) for l in lang_codes ] feature_names = get_feature_names(prefix, feature_database) feature_indices = [ get_feature_index(f, feature_database) for f in feature_names ] source_index = get_source_index(source, feature_database) feature_values = feature_database["data"][lang_indices,:,:][:,feature_indices,:][:,:,source_index] feature_values = feature_values.squeeze(axis=2) return feature_names, feature_values
def gen_pruned_features(name): print name feature_dir = 'data/feature_' + args.domain + \ '_' + str(args.n_boxes) + 'boxes/' + name + '/' n_clips = len(glob.glob(feature_dir + BOX_FEATURE + '*.npy')) for clip in xrange(1, n_clips+1): pruned_boxes = np.load(feature_dir + BOX_FEATURE + '{:04d}.npy'.format(clip)) # (50, args.n_boxes, 4) roisavg = np.load(feature_dir + 'roisavg{:04d}.npy'.format(clip)) # (50, args.n_boxes, 512) pruned_roisavg = np.zeros((50, args.n_boxes, 512)) for frame in xrange(50): for box_id in xrange(args.n_boxes): if not np.array_equal(pruned_boxes[frame][box_id], np.zeros((4))): pruned_roisavg[frame][box_id] = roisavg[frame][box_id] np.save('{}pruned_roisavg{:04d}'.format(feature_dir, clip), pruned_roisavg)
def __init__(self): if not self.code_table: with open(CATEGORY_CODES) as codes: self.code_table = {int(k): v for k, v in json.loads(codes.read()).items()} caffe_models = os.path.expanduser(CAFFE_MODELS) model = 'squeezenet', 'init_net.pb', 'predict_net.pb', 'ilsvrc_2012_mean.npy', 227 self.model = model mean_file = os.path.join(caffe_models, model[0], model[3]) if not os.path.exists(mean_file): self.mean = 128 else: mean = np.load(mean_file).mean(1).mean(1) self.mean = mean[:, np.newaxis, np.newaxis] init_net = os.path.join(caffe_models, model[0], model[1]) predict_net = os.path.join(caffe_models, model[0], model[2]) with open(init_net) as f: self.init_net = f.read() with open(predict_net) as f: self.predict_net = f.read()
def test_xyz2lab(self): assert_array_almost_equal(xyz2lab(self.xyz_array), self.lab_array, decimal=3) # Test the conversion with the rest of the illuminants. for I in ["d50", "d55", "d65", "d75"]: for obs in ["2", "10"]: fname = "lab_array_{0}_{1}.npy".format(I, obs) lab_array_I_obs = np.load( os.path.join(os.path.dirname(__file__), 'data', fname)) assert_array_almost_equal(lab_array_I_obs, xyz2lab(self.xyz_array, I, obs), decimal=2) for I in ["a", "e"]: fname = "lab_array_{0}_2.npy".format(I) lab_array_I_obs = np.load( os.path.join(os.path.dirname(__file__), 'data', fname)) assert_array_almost_equal(lab_array_I_obs, xyz2lab(self.xyz_array, I, "2"), decimal=2)
def test_xyz2luv(self): assert_array_almost_equal(xyz2luv(self.xyz_array), self.luv_array, decimal=3) # Test the conversion with the rest of the illuminants. for I in ["d50", "d55", "d65", "d75"]: for obs in ["2", "10"]: fname = "luv_array_{0}_{1}.npy".format(I, obs) luv_array_I_obs = np.load( os.path.join(os.path.dirname(__file__), 'data', fname)) assert_array_almost_equal(luv_array_I_obs, xyz2luv(self.xyz_array, I, obs), decimal=2) for I in ["a", "e"]: fname = "luv_array_{0}_2.npy".format(I) luv_array_I_obs = np.load( os.path.join(os.path.dirname(__file__), 'data', fname)) assert_array_almost_equal(luv_array_I_obs, xyz2luv(self.xyz_array, I, "2"), decimal=2)
def test_luv2xyz(self): assert_array_almost_equal(luv2xyz(self.luv_array), self.xyz_array, decimal=3) # Test the conversion with the rest of the illuminants. for I in ["d50", "d55", "d65", "d75"]: for obs in ["2", "10"]: fname = "luv_array_{0}_{1}.npy".format(I, obs) luv_array_I_obs = np.load( os.path.join(os.path.dirname(__file__), 'data', fname)) assert_array_almost_equal(luv2xyz(luv_array_I_obs, I, obs), self.xyz_array, decimal=3) for I in ["a", "e"]: fname = "luv_array_{0}_2.npy".format(I, obs) luv_array_I_obs = np.load( os.path.join(os.path.dirname(__file__), 'data', fname)) assert_array_almost_equal(luv2xyz(luv_array_I_obs, I, "2"), self.xyz_array, decimal=3)
def dataset_from_file(filename): """Load a dataset from file. Args: filename (string): the name of the file from which extract the dataset Returns: tuple: the dataset (np.ndarray) and the ngrams (list of strings) """ loader = np.load(filename) num_entries = loader['num_entries'][0] sp_dataset = sparse.csr_matrix((loader['data'], loader['indices'], loader['indptr']), shape = loader['shape']) dataset = sp_dataset.toarray() samp_entries, num_features = dataset.shape return dataset.reshape(int(samp_entries / num_entries), num_entries, num_features), loader['ngrams']
def _load_accumulators(self, main_loop): """Nasty method, use carefully""" for cg_name, model in main_loop.models.iteritems(): source = numpy.load(self.path_to_accumulators.format(cg_name)) accums_dict = {name.replace("-", "/"): value for name, value in source.items()} source.close() algo = main_loop.algorithm.algorithms[cg_name] model_params = model.get_params() steps = algo.steps.items() for pidx in xrange(len(steps)): # Get parameter name and its accumulators p = steps[pidx][0] name = [k for k, v in model_params.iteritems() if v == p][0] accums = accums_dict[name] # This is num_accums_per_param col = len(accums) for aidx in xrange(col): algo.step_rule_updates[pidx*col+aidx][0].set_value( accums[aidx])
def _load_accumulators(self, main_loop): """Load accumulators with some checks.""" for cg_name, model in main_loop.models.iteritems(): # Load accumulators accum_filename = self.path_to_accumulators.format(cg_name) if not os.path.isfile(accum_filename): logger.error(" Accumulators file does not exist [{}]" .format(accum_filename)) continue source = numpy.load(accum_filename) accums_to_load = {k: v for k, v in source.items()} source.close() algo = main_loop.algorithm.algorithms[cg_name] accums = algo.step_rule_updates # Set accumulators for acc in accums: try: acc.set_value(accums_to_load[acc.name]) except: logger.error(" Could not load {}".format(acc.name))
def load_params(self, saveto): try: logger.info(" ...loading model parameters") params_all = numpy.load(saveto) params_this = self.get_params() missing = set(params_this) - set(params_all) for pname in params_this.keys(): if pname in params_all: val = params_all[pname] self._set_param_value(params_this[pname], val, pname) elif self.num_decs > 1 and self.decoder.share_att and \ pname in self.decoder.shared_params_map: val = params_all[self.decoder.shared_params_map[pname]] self._set_param_value(params_this[pname], val, pname) else: logger.warning( " Parameter does not exist: {}".format(pname)) logger.info( " Number of params loaded: {}" .format(len(params_this) - len(missing))) except Exception as e: logger.error(" Error {0}".format(str(e)))
def load_data(): """Draw the Mott lobes.""" res = np.load(r'data_%d.npy' % GRID_SIZE) x = res[:, 0] y = res[:, 1] z = [] for i, entry in enumerate(res): z.append(kinetic_energy(entry[2:], -1.)) plt.pcolor( np.reshape(x, (GRID_SIZE, GRID_SIZE)), np.reshape(y, (GRID_SIZE, GRID_SIZE)), np.reshape(z, (GRID_SIZE, GRID_SIZE)) ) plt.xlabel('$dt/U$') plt.ylabel('$\mu/U$') plt.show()
def _get_batch_normalization_weights(self,layer_name): beta = '%s/batch_normalization/beta:0'%(layer_name) gamma = '%s/batch_normalization/gamma:0'%(layer_name) mean = '%s/batch_normalization/moving_mean:0'%(layer_name) variance = '%s/batch_normalization/moving_variance:0'%(layer_name) if self.weights is None or beta not in self.weights: print('{:>23} {:>23}'.format(beta, 'using default initializer')) return None, None, None, None else: betax = self.weights[beta] gammax = self.weights[gamma] meanx = self.weights[mean] variancex = self.weights[variance] self.loaded_weights[beta]=1 self.loaded_weights[gamma]=1 self.loaded_weights[mean]=1 self.loaded_weights[variance]=1 #print('{:>23} {:>23}'.format(beta, 'load from %s'%self.flags.load_path)) return betax,gammax,meanx,variancex
def post_sub_one(inx): w,h = 1918,1280 path,out,threshold = inx data = np.load(path).item() imgs,pred = data['name'], data['pred'] #print(pred.shape) fo = open(out,'w') #masks = pred>threshold for name,mask in zip(imgs,np.squeeze(pred)): mask = imresize(mask,[h,w]) mask = mask>threshold code = rle_encode(mask) code = [str(i) for i in code] code = " ".join(code) fo.write("%s,%s\n"%(name,code)) fo.close() return 0
def show_one_img_mask(data): w,h = 1918,1280 a = randint(0,31) path = "../input/test" data = np.load(data).item() name,masks = data['name'][a],data['pred'] img = Image.open("%s/%s"%(path,name)) #img.show() plt.imshow(img) plt.show() mask = np.squeeze(masks[a]) mask = imresize(mask,[h,w]).astype(np.float32) print(mask.shape,mask[0]) img = Image.fromarray(mask*256)#.resize([w,h]) plt.imshow(img) plt.show()
def split(flags): if os.path.exists(flags.split_path): return np.load(flags.split_path).item() folds = flags.folds path = flags.input_path random.seed(6) img_list = ["%s/%s"%(path,img) for img in os.listdir(path)] random.shuffle(img_list) dic = {} n = len(img_list) num = (n+folds-1)//folds for i in range(folds): s,e = i*num,min(i*num+num,n) dic[i] = img_list[s:e] np.save(flags.split_path,dic) return dic
def make_benchmark_figure(): fig = plt.figure(figsize=(6,6)) ax = fig.add_subplot(1, 1, 1, xscale='linear', yscale='log') d1 = np.load('./data/random_data_benchmark.npy') d2 = np.load('./data/real_data_benchmark.npy') d3 = np.load('./data/real_data_orange3_benchmark.npy') ax.scatter(d1[:24, 0], d1[:24, 2], c='r', edgecolor='none', label='Random Data (Polo)') ax.scatter(d2[:24, 0], d2[:24, 2], c='green', edgecolor='none', label='Gene expression data (Polo)') ax.scatter(d3[:24, 0], d3[:24, 2], c='blue', edgecolor='none', label='Gene expression data (Orange3)') ax.legend(loc=2) ax.grid('on') ax.set_xlabel('log2(Number of leaves)') ax.set_ylabel('Run time, seconds') fig.tight_layout() fig.savefig('data/bench.png', dpi=75)
def read_data(): with open(PICKLE_FILENAME, 'rb') as f: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] valid_dataset = save['valid_dataset'] valid_labels = save['valid_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] del save print('Training set', train_dataset.shape, train_labels.shape) print('Valid set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) return [train_dataset, valid_dataset, test_dataset], [train_labels, valid_labels, test_labels]
def lrelu(x, leak=0.2, name="lrelu"): """Leaky rectifier. """ with tf.variable_scope(name): f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * x + f2 * abs(x) # load CIFAR-10 # trainx, trainy = cifar10_data.load(args.data_dir, subset='train') # trainx = trainx.transpose(0, 2, 3, 1) # trainx_unl = trainx.copy() # trainx_unl2 = trainx.copy() # testx, testy = cifar10_data.load(args.data_dir, subset='test') # testx = testx.transpose(0, 2, 3, 1) # nr_batches_train = int(trainx.shape[0]/args.batch_size) # nr_batches_test = int(testx.shape[0]/args.batch_size) # load MNIST data
def open(filename, frame='unspecified'): """Create a Point from data saved in a file. Parameters ---------- filename : :obj:`str` The file to load data from. frame : :obj:`str` The frame to apply to the created point. Returns ------- :obj:`Point` A point created from the data in the file. """ data = BagOfPoints.load_data(filename) return Point(data, frame)
def open(filename, frame='unspecified'): """Create a Direction from data saved in a file. Parameters ---------- filename : :obj:`str` The file to load data from. frame : :obj:`str` The frame to apply to the created Direction. Returns ------- :obj:`Direction` A Direction created from the data in the file. """ data = BagOfPoints.load_data(filename) return Direction(data, frame)
def open(filename, frame='unspecified'): """Create a PointCloud from data saved in a file. Parameters ---------- filename : :obj:`str` The file to load data from. frame : :obj:`str` The frame to apply to the created PointCloud. Returns ------- :obj:`PointCloud` A PointCloud created from the data in the file. """ data = BagOfPoints.load_data(filename) return PointCloud(data, frame)
def open(filename, frame='unspecified'): """Create a NormalCloud from data saved in a file. Parameters ---------- filename : :obj:`str` The file to load data from. frame : :obj:`str` The frame to apply to the created NormalCloud. Returns ------- :obj:`NormalCloud` A NormalCloud created from the data in the file. """ data = BagOfPoints.load_data(filename) return NormalCloud(data, frame)
def open(filename, frame='unspecified'): """Create a RgbCloud from data saved in a file. Parameters ---------- filename : :obj:`str` The file to load data from. frame : :obj:`str` The frame to apply to the created RgbCloud. Returns ------- :obj:`RgbCloud` A RgdCloud created from the data in the file. """ data = BagOfPoints.load_data(filename) return RgbCloud(data, frame)
def __init__(self, audio_file: Path, id: Optional[str] = None, sample_rate_to_convert_to: int = 16000, label: Optional[str] = "nolabel", fourier_window_length: int = 512, hop_length: int = 128, mel_frequency_count: int = 128, label_with_tags: str = None, positional_label: Optional[PositionalLabel] = None): # The default values for hop_length and fourier_window_length are powers of 2 near the values specified in the wave2letter paper. if id is None: id = name_without_extension(audio_file) self.audio_file = audio_file super().__init__( id=id, get_raw_audio=lambda: librosa.load(str(self.audio_file), sr=self.sample_rate)[0], label=label, sample_rate=sample_rate_to_convert_to, fourier_window_length=fourier_window_length, hop_length=hop_length, mel_frequency_count=mel_frequency_count, label_with_tags=label_with_tags, positional_label=positional_label)
def load_word2vec_matrix(vec_file, word_index, config): if os.path.isfile(DirConfig.W2V_CACHE): print('---- Load word vectors from cache.') embedding_matrix = np.load(open(DirConfig.W2V_CACHE, 'rb')) return embedding_matrix print('---- loading word2vec ...') word2vec = KeyedVectors.load_word2vec_format( vec_file, binary=True) print('Found %s word vectors of word2vec' % len(word2vec.vocab)) nb_words = min(config.MAX_NB_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((nb_words, config.WORD_EMBEDDING_DIM)) for word, i in word_index.items(): if word in word2vec.vocab: embedding_matrix[i] = word2vec.word_vec(word) print('Null word embeddings: %d' % \ np.sum(np.sum(embedding_matrix, axis=1) == 0)) # check the words which not in embedding vectors not_found_words = [] for word, i in word_index.items(): if word not in word2vec.vocab: not_found_words.append(word) np.save(open(DirConfig.W2V_CACHE, 'wb'), embedding_matrix) return embedding_matrix
def get_sample_item_file(wav_file_names_sample, item_file, output): """ From a sampled dataset, get an item file for running an ABX task Parameters ---------- item file : text file containing at least as columns : #filename, onset, offset, #phoneme and context and side information such as image ID item_file : string, path to the item file of the whole dataset output: string, path where the sample item file will be stored """ wav_names=[] temp=np.load(wav_file_names_sample) for s in temp: wav_names.append(s.split(".")[0]) df=pd.read_csv(item_file, sep="\t", index_col="#filename") df_sample=df.loc[wav_names] df_sample.to_csv(output, sep="\t", header=True, index=False) return(df_sample)
def __init__(self, batchsize=64, max_length=15, mode='train'): self.batchsize = batchsize self.d_vocabulary = None self.batch_index = None self.batch_len = None self.rev_adict = None self.max_length = max_length self.mode = mode self.qdic, self.adic = VQADataProvider.load_data(mode) with open('./result/vdict.json','r') as f: self.vdict = json.load(f) with open('./result/adict.json','r') as f: self.adict = json.load(f) self.n_ans_vocabulary = len(self.adict) self.nlp = spacy.load('en', vectors='en_glove_cc_300_1m_vectors') self.glove_dict = {} # word -> glove vector
def load_vqa_json(data_split): """ Parses the question and answer json files for the given data split. Returns the question dictionary and the answer dictionary. """ qdic, adic = {}, {} with open(config.DATA_PATHS[data_split]['ques_file'], 'r') as f: qdata = json.load(f)['questions'] for q in qdata: qdic[data_split + QID_KEY_SEPARATOR + str(q['question_id'])] = \ {'qstr': q['question'], 'iid': q['image_id']} if 'test' not in data_split: with open(config.DATA_PATHS[data_split]['ans_file'], 'r') as f: adata = json.load(f)['annotations'] for a in adata: adic[data_split + QID_KEY_SEPARATOR + str(a['question_id'])] = \ a['answers'] print 'parsed', len(qdic), 'questions for', data_split return qdic, adic
def load_genome_json(): """ Parses the genome json file. Returns the question dictionary and the answer dictionary. """ qdic, adic = {}, {} with open(config.DATA_PATHS['genome']['genome_file'], 'r') as f: qdata = json.load(f) for q in qdata: key = 'genome' + QID_KEY_SEPARATOR + str(q['id']) qdic[key] = {'qstr': q['question'], 'iid': q['image']} adic[key] = [{'answer': q['answer']}] print 'parsed', len(qdic), 'questions for genome' return qdic, adic
def __init__(self, batchsize=64, max_length=15, mode='train'): self.batchsize = batchsize self.d_vocabulary = None self.batch_index = None self.batch_len = None self.rev_adict = None self.max_length = max_length self.mode = mode self.qdic, self.adic = VQADataProvider.load_data(mode) with open('./result/vdict.json','r') as f: self.vdict = json.load(f) with open('./result/adict.json','r') as f: self.adict = json.load(f) self.n_ans_vocabulary = len(self.adict)
def test_individual_stability_matrix(): """ Tests individual_stability_matrix method on three gaussian blobs. """ import utils import numpy as np import scipy as sp desired = np.load(home + '/git_repo/PyBASC/tests/ism_test.npy') blobs = generate_blobs() ism = utils.individual_stability_matrix(blobs, 20, 3) #how to use test here? # np.corrcoef(ism.flatten(),desired.flatten()) # np.testing.assert_equal(ism,desired) # # corr=np.array(sp.spatial.distance.cdist(ism, desired, metric = 'correlation')) # assert False
def test_ndarray_to_vol(): import basc import nibabel as nb subject_file = home + '/git_repo/PyBASC/sample_data/sub1/Func_Quarter_Res.nii.gz' subject_file = home + '/git_repo/PyBASC/sample_data/test.nii.gz' data = nb.load(subject_file).get_data().astype('float32') roi_mask_file= home + '/git_repo/PyBASC/masks/LC_Quarter_Res.nii.gz' print( 'Data Loaded') roi_mask_file_nb = nb.load(roi_mask_file) roi_mask_nparray = nb.load(roi_mask_file).get_data().astype('float32').astype('bool') roi1data = data[roi_mask_nparray] data_array=roi1data sample_file=subject_file filename=home + '/git_repo/PyBASC/sample_data/ndarray_to_vol_test.nii.gz' basc.ndarray_to_vol(data_array, roi_mask_file, roi_mask_file, filename)
def get_dataset(dataset_path='Data/Train_Data'): # Getting all data from data path: try: X = np.load('Data/npy_train_data/X.npy') Y = np.load('Data/npy_train_data/Y.npy') except: inputs_path = dataset_path+'/input' images = listdir(inputs_path) # Geting images X = [] Y = [] for img in images: img_path = inputs_path+'/'+img x_img = get_img(img_path).astype('float32').reshape(64, 64, 3) x_img /= 255. y_img = get_img(img_path.replace('input/', 'mask/mask_')).astype('float32').reshape(64, 64, 1) y_img /= 255. X.append(x_img) Y.append(y_img) X = np.array(X) Y = np.array(Y) # Create dateset: if not os.path.exists('Data/npy_train_data/'): os.makedirs('Data/npy_train_data/') np.save('Data/npy_train_data/X.npy', X) np.save('Data/npy_train_data/Y.npy', Y) X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42) return X, X_test, Y, Y_test
def __init__(self, saved_model=None, train_folder=None, feature=_feature.__func__): """ :param saved_model: optional saved train set and labels as .npz :param train_folder: optional custom train data to process :param feature: feature function - compatible with saved_model """ self.feature = feature if train_folder is not None: self.train_set, self.train_labels, self.model = \ self.create_model(train_folder) else: if cv2.__version__[0] == '2': self.model = cv2.KNearest() else: self.model = cv2.ml.KNearest_create() if saved_model is None: saved_model = TRAIN_DATA+'raw_pixel_data.npz' with np.load(saved_model) as data: self.train_set = data['train_set'] self.train_labels = data['train_labels'] if cv2.__version__[0] == '2': self.model.train(self.train_set, self.train_labels) else: self.model.train(self.train_set, cv2.ml.ROW_SAMPLE, self.train_labels)
def load(self, model_filename): self.__model = load_model("%s.model" % model_filename) self.__chars = np.load("%s.cvocab.npy" % model_filename).tolist() self.__trigrams = np.load("%s.tvocab.npy" % model_filename).tolist() self.__classes = np.load("%s.classes.npy" % model_filename).tolist() self.__char_indexes = dict((c, i) for i, c in enumerate(self.__chars)) self.__indexes_char = dict((i, c) for i, c in enumerate(self.__chars)) self.__trigrams_indexes = dict((t, i) for i, t in enumerate(self.__trigrams)) self.__indices_trigrams = dict((i, t) for i, t in enumerate(self.__trigrams)) self.__classes_indexes = dict((c, i) for i, c in enumerate(self.__classes)) self.__indexes_classes = dict((i, c) for i, c in enumerate(self.__classes))
def get_id_set(lang_codes): feature_database = np.load("family_features.npz") lang_codes = [ get_language_code(l, feature_database) for l in lang_codes ] all_languages = list(feature_database["langs"]) feature_names = [ "ID_" + l.upper() for l in all_languages ] values = np.zeros((len(lang_codes), len(feature_names))) for i, lang_code in enumerate(lang_codes): feature_index = get_language_index(lang_code, feature_database) values[i, feature_index] = 1.0 return feature_names, values
def unpickle(file): import pickle fo = open(file, 'rb') dict = pickle.load(fo, encoding='latin1') fo.close() return dict
def load_pkl(path): with open(path) as f: obj = cPickle.load(f) print(" [*] load %s" % path) return obj
def load_npy(path): obj = np.load(path) print(" [*] load %s" % path) return obj
def load(self, local_dir_=None): ''' load dataset from local disk Args: local_dir_: string or None if None, will use default Dataset.DEFAULT_DIR '''
def load(self, local_dir_=None): if local_dir_ is None: local_dir = self.DEFAULT_DIR else: local_dir = Path(local_dir_) data_di = np.load(str(local_dir/'cifar10.npz')) self.datum[:] = data_di['images'] self.labels[:] = data_di['labels']
def install( self, local_dst_dir_=None, local_src_dir_=None, clean_install_=False): ''' Install the dataset into directly usable format, requires downloading for public dataset. Args: local_dst_dir_: string or None where to install the dataset, None -> "%(default_dir)s" local_src_dir_: string or None where to find the raw downloaded files, None -> "%(default_dir)s" ''' local_dst_dir = self.DEFAULT_DIR if local_dst_dir_ is None else Path(local_dst_dir_) local_src_dir = self.DEFAULT_DIR if local_src_dir_ is None else Path(local_src_dir_) local_dst_dir.mkdir(parents=True, exist_ok=True) assert local_src_dir.exists() images = np.empty((60000,3,32,32), dtype=np.uint8) labels = np.empty((60000,), dtype=np.uint8) tarfile_name = str(local_src_dir / 'cifar-10-python.tar.gz') with tarfile.open(tarfile_name, 'r:gz') as tf: for i in range(5): with tf.extractfile('cifar-10-batches-py/data_batch_%d'%(i+1)) as f: data_di = pickle.load(f, encoding='bytes') images[(10000*i):(10000*(i+1))] = data_di[b'data'].reshape((10000,3,32,32)) labels[(10000*i):(10000*(i+1))] = np.asarray(data_di[b'labels'], dtype=np.uint8) with tf.extractfile('cifar-10-batches-py/test_batch') as f: data_di = pickle.load(f, encoding='bytes') images[50000:60000] = data_di[b'data'].reshape((10000,3,32,32)) labels[50000:60000] = data_di[b'labels'] np.savez_compressed(str(local_dst_dir / 'cifar10.npz'), images=images, labels=labels) if clean_install_: os.remove(tarfile_name)
def load(self, local_dir_=None): if local_dir_ is None: local_dir = self.DEFAULT_DIR else: local_dir = Path(local_dir_) data = np.load(str(local_dir / 'mnist.npz')) self.labels = data['labels'] self.datum = data['images'] self.label_map = np.arange(10) self.imsize = (1,28,28)
def load(self, local_dir_=None): # TODO raise NotImplementedError()
def load_aggregate_masks_scans (masks_mnames, grids, upgrid_multis): scans = [] masks = [] igrid = 0 for masks_names in masks_mnames: if (len(masks_names) > 0): grid = grids[igrid] upgrid_multi = upgrid_multis[igrid] upgcount = upgrid_multi * upgrid_multi scans1 = [] masks1 = [] for masks_name in masks_names: print ("Loading: ", masks_name) masks0 = np.load(''.join((masks_name, ".npz")))['arr_0'] scans0 = np.load(''.join((masks_name.replace("masks_", "scans_", 1), ".npz")))['arr_0'] masks1.append(masks0) scans1.append(scans0) scans1 = np.vstack(scans1) masks1 = np.vstack(masks1) if len(masks) > 0: scans1 = np.vstack([scans1, scans]) masks1 = np.vstack([masks1, masks]) lm = len(masks1) // upgcount * upgcount scans1 = scans1[0:lm] # cut to multiples of upgcount masks1 = masks1[0:lm] index_shuf = np.arange(lm) np.random.shuffle(index_shuf) scans1 = scans1[index_shuf] masks1 = masks1[index_shuf] scans = data_from_grid_by_proximity(scans1, upgrid_multi, upgrid_multi, grid=grid) masks = data_from_grid_by_proximity(masks1, upgrid_multi, upgrid_multi, grid=grid) igrid += 1 return masks, scans