我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用_pickle.load()。
def load_models(models_dir): """ Load saved models from disk. This will attempt to unpickle all files in a directory; any files that give errors on unpickling (such as README.txt) will be skipped. Inputs: - models_dir: String giving the path to a directory containing model files. Each model file is a pickled dictionary with a 'model' field. Returns: A dictionary mapping model file names to models. """ models = {} for model_file in os.listdir(models_dir): with open(os.path.join(models_dir, model_file), 'rb') as f: try: models[model_file] = pickle.load(f)['model'] except pickle.UnpicklingError: continue return models
def get_data(name): """Load data from the given name""" gen_data = {} # new version if os.path.isfile(name + 'data.pickle'): curent_f = open(name + 'data.pickle', 'rb') d2 = cPickle.load(curent_f) # Old version else: curent_f = open(name, 'rb') d1 = cPickle.load(curent_f) data1 = d1[0] data = np.array([data1[:, :, :, :, :, 0], data1[:, :, :, :, :, 1]]) # Convert log e to log2 normalization_factor = 1 / np.log2(2.718281) epochsInds = np.arange(0, data.shape[4]) d2 = {} d2['epochsInds'] = epochsInds d2['information'] = data / normalization_factor return d2
def __init__(self,filename='word2vec.pklz'): """ Py Word2vec?? """ super().__init__() self.name='word2vec' self.load(filename) self.vocab_cnt=len(self) self.dims=self[list(self.keys())[0]].shape[0] print('???:' + str(self.vocab_cnt)) print('???:' + str(self.dims)) self.word2idx= {w: i for i, w in enumerate(self.keys())} self.idx2word= {i: w for i, w in enumerate(self.keys())} self._matrix =np.array(list(self.values())) print(self._matrix.shape)
def load_caltech101(folder=CALTECH101_DIR, one_hot=True, partitions=None, filters=None, maps=None): path = folder + "/caltech101.pickle" with open(path, "rb") as input_file: X, target_name, files = cpickle.load(input_file) dict_name_ID = {} i = 0 list_of_targets = sorted(list(set(target_name))) for k in list_of_targets: dict_name_ID[k] = i i += 1 dict_ID_name = {v: k for k, v in dict_name_ID.items()} Y = [] for name_y in target_name: Y.append(dict_name_ID[name_y]) if one_hot: Y = to_one_hot_enc(Y) dataset = Dataset(data=X, target=Y, info={'dict_name_ID': dict_name_ID, 'dict_ID_name': dict_ID_name}, sample_info=[{'target_name': t, 'files': f} for t, f in zip(target_name, files)]) if partitions: res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True) res += [None] * (3 - len(res)) return Datasets(train=res[0], validation=res[1], test=res[2]) return dataset
def load_cifar10(folder=CIFAR10_DIR, one_hot=True, partitions=None, filters=None, maps=None, balance_classes=False): path = folder + "/cifar-10.pickle" with open(path, "rb") as input_file: X, target_name, files = cpickle.load(input_file) X = np.array(X) dict_name_ID = {} i = 0 list_of_targets = sorted(list(set(target_name))) for k in list_of_targets: dict_name_ID[k] = i i += 1 dict_ID_name = {v: k for k, v in dict_name_ID.items()} Y = [] for name_y in target_name: Y.append(dict_name_ID[name_y]) if one_hot: Y = to_one_hot_enc(Y) dataset = Dataset(data=X, target=Y, info={'dict_name_ID': dict_name_ID, 'dict_ID_name': dict_ID_name}, sample_info=[{'target_name': t, 'files': f} for t, f in zip(target_name, files)]) if partitions: res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True, balance_classes=True) res += [None] * (3 - len(res)) return Datasets(train=res[0], validation=res[1], test=res[2]) return dataset
def load_codex_list(): ''' Loads codex list from file to show in select menu Parameters ------------ None Returns ------------ * codex_list (string[]): Array of codex ''' codex_list = None with open(os.path.join(ROOT_DIR, 'data/res/codex_list'), 'rb') as f_in: codex_list = pickle.load(f_in) return codex_list
def load_pickle(f): """ loads and returns the content of a pickled file it handles the inconsistencies between the pickle packages available in Python 2 and 3 """ try: import cPickle as thepickle except ImportError: import _pickle as thepickle try: ret = thepickle.load(f, encoding='latin1') except TypeError: ret = thepickle.load(f) return ret
def setUp(self): self.seed = 0 file_path = os.path.join(TEST_DATA_DIR, ML_100K_RATING_PKL) with gzip.open(file_path, 'rb') as f: if sys.version_info[0] == 3: ratings = cPickle.load(f, encoding='latin1') else: ratings = cPickle.load(f) self.n_user = 943 self.n_item = 1682 self.assertEqual(ratings.shape[0], 100000) self.assertEqual(ratings[:, 0].min(), 1) self.assertEqual(ratings[:, 0].max(), self.n_user) self.assertEqual(ratings[:, 1].min(), 1) self.assertEqual(ratings[:, 1].max(), self.n_item) # let user_id / item_id start from 0 ratings[:, 0] = ratings[:, 0] - 1 ratings[:, 1] = ratings[:, 1] - 1 self.ratings = ratings
def get(fn, *args, **kwargs): """??redis?cache?? fn: ??, ???????? return: data fn????""" key = gen_keyname(fn) r = createRedis() #r.flushall() if key not in r.keys(): o = fn(*args, **kwargs) #????????? f = cStringIO.StringIO() cPickle.dump(o, f) s = f.getvalue() f.close() r.set(key, s) s = r.get(key) f = cStringIO.StringIO(s) o = cPickle.load(f) f.close() return o
def load_transformer_list(config_data): output_directory = config_data['embeddings_directory'] output_basename = config_data['embeddings_basename'] path = os.path.join(output_directory, output_basename) config_fname = os.path.join(path, 'config.json') with open(config_fname, 'r') as json_data: wemb_config = json.load(json_data) ngrams = wemb_config['ngrams'] transformers = [] for i in range(ngrams - 1): phrase_model = Phrases.load(os.path.join(path, '{}gram'.format(i))) transformers.append(phrase_model) return transformers
def load_mnist(path, num_training=50000, num_test=10000, cnn=True, one_hot=False): f = gzip.open(path, 'rb') training_data, validation_data, test_data = cPickle.load( f, encoding='iso-8859-1') f.close() X_train, y_train = training_data X_validation, y_validation = validation_data X_test, y_test = test_data if cnn: shape = (-1, 1, 28, 28) X_train = X_train.reshape(shape) X_validation = X_validation.reshape(shape) X_test = X_test.reshape(shape) if one_hot: y_train = one_hot_encode(y_train, 10) y_validation = one_hot_encode(y_validation, 10) y_test = one_hot_encode(y_test, 10) X_train, y_train = X_train[range( num_training)], y_train[range(num_training)] X_test, y_test = X_test[range(num_test)], y_test[range(num_test)] return (X_train, y_train), (X_test, y_test)
def load_cifar10(path, num_training=1000, num_test=1000): Xs, ys = [], [] for batch in range(1, 6): f = open(os.path.join(path, "data_batch_{0}".format(batch)), 'rb') data = cPickle.load(f, encoding='iso-8859-1') f.close() X = data["data"].reshape(10000, 3, 32, 32).astype("float64") y = np.array(data["labels"]) Xs.append(X) ys.append(y) f = open(os.path.join(CIFAR10_PATH, "test_batch"), 'rb') data = cPickle.load(f, encoding='iso-8859-1') f.close() X_train, y_train = np.concatenate(Xs), np.concatenate(ys) X_test = data["data"].reshape(10000, 3, 32, 32).astype("float") y_test = np.array(data["labels"]) X_train, y_train = X_train[range( num_training)], y_train[range(num_training)] X_test, y_test = X_test[range(num_test)], y_test[range(num_test)] mean = np.mean(X_train, axis=0) std = np.std(X_train) X_train /= 255.0 X_test /= 255.0 return (X_train, y_train), (X_test, y_test)
def __init__(self, filepath): """ Args: filepath (string): path to data file Data format - list of characters, list of images, (row, col, ch) numpy array normalized between (0.0, 1.0) Omniglot dataset - Each language contains a set of characters; Each character is defined by 20 different images """ with open(filepath, "rb") as f: processed_data = pickle.load(f) self.data = dict() for image, label in zip(processed_data['images'], processed_data['labels']): if label not in self.data: self.data[label] = list() img = np.expand_dims(image, axis=0).astype('float32') #img /= 255.0 self.data[label].append(img) self.num_categories = len(self.data) self.category_size = len(self.data[processed_data['labels'][0]])
def __init__(self, filepath): """ Args: filepath (string): path to data file Data format - list of characters, list of images, (row, col, ch) numpy array normalized between (0.0, 1.0) Omniglot dataset - Each language contains a set of characters; Each character is defined by 20 different images """ with open(filepath, "rb") as f: processed_data = pickle.load(f) self.data = dict() for image, label in zip(processed_data['images'], processed_data['labels']): if label not in self.data: self.data[label] = list() img = np.expand_dims(image, axis=0).astype('float32') img /= 255.0 self.data[label].append(img) self.num_categories = len(self.data) self.category_size = len(self.data[processed_data['labels'][0]])
def generate_random_hyperparams(lr_min, lr_max, K_min, K_max, num_layers_min, num_layers_max,load_hparams): """This function generates random hyper-parameters for hyperparameter search""" #this is for new random parameters if not load_hparams[0]: lr_exp = np.random.uniform(lr_min, lr_max) lr = 10**(lr_exp) K = np.random.choice(np.arange(K_min, K_max+1),1)[0] num_layers = np.random.choice(np.arange(num_layers_min, num_layers_max + 1),1)[0] #momentum_exp = np.random.uniform(-8,0) momentum = np.random.uniform(0,1) #(2**momentum_exp) #this loads hyperparameters from an existing file else: exp_data = np.load('experiment_data/nmf_data_timit_model_bi_mod_lstm_diag_to_full_device_cpu:0_1490813245.npy')[load_hparams[1]] lr = exp_data['LR'] K = exp_data['K'] num_layers = exp_data['num_layers'] try: momentum = exp_data['num_layers'] except: momentum = None return lr, K, num_layers, momentum
def load_model(): """ Load the model with saved tables """ # Load model options print('Loading model parameters...') with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: boptions = pkl.load(f) # Load parameters uparams = init_params(uoptions) uparams = load_params(path_to_umodel, uparams) utparams = init_tparams(uparams) bparams = init_params_bi(boptions) bparams = load_params(path_to_bmodel, bparams) btparams = init_tparams(bparams) # Extractor functions print('Compiling encoders...') embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables print('Loading tables...') utable, btable = load_tables() # Store everything we need in a dictionary print('Packing up...') model = {} model['uoptions'] = uoptions model['boptions'] = boptions model['utable'] = utable model['btable'] = btable model['f_w2v'] = f_w2v model['f_w2v2'] = f_w2v2 return model
def load_tables(): """ Load the tables """ words = [] utable = numpy.load(path_to_tables + 'utable.npy', encoding='bytes') btable = numpy.load(path_to_tables + 'btable.npy', encoding='bytes') f = open(path_to_tables + 'dictionary.txt', 'rb') for line in f: words.append(line.decode('utf-8').strip()) f.close() utable = OrderedDict(zip(words, utable)) btable = OrderedDict(zip(words, btable)) return utable, btable
def preprocess(text): """ Preprocess text for encoder """ X = [] sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') for t in text: sents = sent_detector.tokenize(t) result = '' for s in sents: tokens = word_tokenize(s) result += ' ' + ' '.join(tokens) X.append(result) return X
def load_params(path, params): """ load parameters """ pp = numpy.load(path) for kk, vv in params.items(): if kk not in pp: warnings.warn('%s is not in the archive'%kk) continue params[kk] = pp[kk] return params # layers: 'name': ('parameter initializer', 'feedforward')
def load_CIFAR_batch(filename): """ load single batch of cifar """ with open(filename, 'rb') as f: datadict = pickle.load(f, encoding='latin1') X = datadict['data'] Y = datadict['labels'] X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype("float") Y = np.array(Y) return X, Y
def load_CIFAR10(ROOT): """ load all of cifar """ xs = [] ys = [] for b in range(1, 6): f = os.path.join(ROOT, 'data_batch_%d' % (b, )) X, Y = load_CIFAR_batch(f) xs.append(X) ys.append(Y) Xtr = np.concatenate(xs) Ytr = np.concatenate(ys) del X, Y Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) return Xtr, Ytr, Xte, Yte
def load(self,filename='word2vec.pklz'): fil = gzip.open(filename, 'rb') while True: try: tmp=cPickle.load(fil) self.update(tmp) except EOFError as e: print(e) break fil.close()
def load_batch_from_file(file): file_conn = open(file, 'rb') image_dictionary = cPickle.load(file_conn, encoding='latin1') file_conn.close() return(image_dictionary)
def load_cifar100(folder=CIFAR100_DIR, one_hot=True, partitions=None, filters=None, maps=None): path = folder + "/cifar-100.pickle" with open(path, "rb") as input_file: X, target_ID_fine, target_ID_coarse, fine_ID_corr, coarse_ID_corr, files = cpickle.load(input_file) X = np.array(X); target_ID_fine = target_ID_fine[:len(X)] target_ID_coarse = target_ID_coarse[:len(X)] fine_ID_corr = {v: k for v, k in zip(range(len(fine_ID_corr)), fine_ID_corr)} coarse_ID_corr = {v: k for v, k in zip(range(len(coarse_ID_corr)), coarse_ID_corr)} fine_label_corr = {v: k for k, v in fine_ID_corr.items()} coarse_label_corr = {v: k for k, v in coarse_ID_corr.items()} Y = [] for name_y in target_ID_fine: Y.append(name_y) Y = np.array(Y) if one_hot: Y = to_one_hot_enc(Y) superY = [] for name_y in target_ID_coarse: superY.append(name_y) superY = np.array(superY) if one_hot: superY = to_one_hot_enc(superY) print(len(X)) print(len(Y)) dataset = Dataset(data=X, target=Y, info={'dict_name_ID_fine': fine_label_corr, 'dict_name_ID_coarse': coarse_label_corr, 'dict_ID_name_fine': fine_ID_corr, 'dict_ID_name_coarse': coarse_ID_corr}, sample_info=[{'Y_coarse': yc, 'files': f} for yc, f in zip(superY, files)]) if partitions: res = redivide_data([dataset], partitions, filters=filters, maps=maps, shuffle=True) res += [None] * (3 - len(res)) return Datasets(train=res[0], validation=res[1], test=res[2]) return dataset
def load_obj(name, root_dir=None, notebook_mode=True): if root_dir is None: root_dir = os.getcwd() directory = check_or_create_dir(join_paths(root_dir, FOLDER_NAMINGS['OBJ_DIR']), notebook_mode=notebook_mode, create=False) filename = join_paths(directory, name if name.endswith('.pkgz') else name + '.pkgz') with gzip.open(filename, 'rb') as f: return pickle.load(f)
def __init__(self, recompute_statistics=False): """ Initiation of module. Parameters ------------ *recompute_statistics(boolean): if True, statistics are calculated again, if False, statistics are loaded from file """ self.get_article_statistics(recompute_statistics=recompute_statistics) # list which users see self.cur_articles_list = list(self.article_index.values()) self.cur_articles_list = [a for a in self.cur_articles_list if a.questions_cnt > 0 and a.official_article] # ranking list of articles without filters - for fast execution # self.articles_list_all = list(self.article_index.values()) # self.articles_list_all = [a for a in self.articles_list_all if a.questions_cnt > 0] # filters list self.filters_type = [] self.filters_data = [] # with open("../data/guide_articles/guide_article_ID", "rb") as f: # self.ids_in_guides = cPickle.load(f) with open("../data/guide_articles/guides_list", "rb") as f: self.guides_list = cPickle.load(f) # print (len(self.ids_in_guides)) # default - ranked by cnt_questions, no filters # self.ranking_articles(rank_type='by_cnt_questions')
def get_article_statistics(self, recompute_statistics=True): """ Agregate statistics from both forum. """ if recompute_statistics: self.get_article_index() data_generator = loadDataGenerator() cnt_not_match_links = 0 links_cnt = 0 l2a = Link2Article() # log = open("./logs", "w") # error_link = [] for question_batch in data_generator: for question in question_batch: links = LinksSearcher(question.get_all_text()).get_simple_links() for link in links: # log.write(link.link_text + "\n") # log.flush() # function from Alexandrina article = l2a.link2article(link) # print (article) if article: # print (article.article_ID) links_cnt += 1 self.article_index[article.article_ID].add_question(question, link) else: cnt_not_match_links += 1 sys.stderr.write("\r\t\t\t\t\tALL LINKS: %d; CAN't MATCH: %d" % (links_cnt, cnt_not_match_links)) with open("./../data/statistics/article_statistics", "wb") as f: cPickle.dump(self.article_index, f, protocol=pickle.HIGHEST_PROTOCOL) else: with open("./../data/statistics/article_statistics", "rb") as f: self.article_index = cPickle.load(f)
def get_graphics(self, dirpath = "../app/static/article_pics/"): with open("./../data/statistics/article_statistics", "rb") as f: self.article_index = cPickle.load(f) for idx, article_ID in enumerate(self.article_index.keys()): sys.stderr.write("\r %d / %d" % (idx, len(list(self.article_index.keys())))) # if "19671_220" == article_ID: # with open("./logs", "w") as f: # for d in self.article_index[article_ID].dates: # f.write(str(d) + "\n") # return if len(self.article_index[article_ID].dates) >= 1: # print (self.article_index[article_ID].dates[:10]) plot_dates(self.article_index[article_ID].dates, dirpath + article_ID)
def classify_text(text,i=0): global distinct_classes,class_freq,classifier ## condition to exit the recursion ## after all classifiers are exhausted to classify if i == len(distinct_classes): return -1 else: ## assign the current class model to 'classifier' if ('classifier_'+class_freq['index'][i]) in globals(): classsifier_assignemnt[int(class_freq['index'][i])]() else: ## load model if not loaded already with open('classifier_'+class_freq['index'][i]+'.pkl', 'rb') as fid: classifier = cPickle.load(fid) ## predict using the current classifier predicted_class = classifier.classify(text) if predicted_class == '-1': ## if current classifier predicts docuemnt belong to other class ## loop again with next classifier to predict return classify_text(text,i + 1) else: ## return the class label if current classifier predicts document ## to belong to current class return predicted_class ## defining an object as classifier
def predict_split(train_size): if os.path.getsize(TRAIN_PREDICTIONS) == 0: print("Splitting Training/Testing Data...") _, _, statuses = aggregate.aggregate_train_test(train_size) statuses = "\n".join(statuses) print("Gathering Features...") train_preds = predict(statuses, text_type='str', data=AGGREGATE_TRAINING) print("==== DUMPING TRAINING ====") with open(TRAIN_PREDICTIONS, 'wb') as f: _pickle.dump(train_preds, f) test_preds = predict(statuses, text_type='str', data=AGGREGATE_TESTING) print("==== DUMPING TESTING ====") with open(TEST_PREDICTIONS, 'wb') as f: _pickle.dump(test_preds, f) with open(TRAIN_PREDICTIONS, 'rb') as f: train_preds = _pickle.load(f) helpers.print_preds(train_preds, "TRAINING RESULTS") with open(TEST_PREDICTIONS, 'rb') as f: test_preds = _pickle.load(f) helpers.print_preds(test_preds, "TESTING RESULTS") print("Reading Testing Data...") with open(AGGREGATE_TESTING_STATUSES, 'r') as f: csv_reader = csv.reader(f, delimiter=',', quotechar='"') predictions = {} print("Collecting Predictions...\n") for i, status in enumerate(csv_reader): print("\r" + str(i), end="") train_pred = predict(status[0], text_type='str', data=AGGREGATE_TRAINING) test_pred = predict(status[0], text_type='str', data=AGGREGATE_TESTING) predictions[i] = [status[0], pred_to_labels(train_pred), pred_to_labels(test_pred)] sys.exit() return predictions
def predict(file_in, text_type='file', data=AGGREGATE_INFO_FILE): """ Given a valid filepath, run predict_sent() for each line in the file and print out the prediction values for each personality class. """ global DATA # Load the 'training' data once with open(data, 'rb') as in_file: DATA = _pickle.load(in_file) if text_type == 'file': # Get the raw text from file_in with open(file_in, newline='') as in_file: csv_reader = csv.reader(in_file, delimiter=',', quotechar='"') text = "\n".join(in_file.readlines()) else: text = file_in # Aggregate the file-level values preds = {'eFreq_n': 0, 'eFreq_y': 0, 'eRatio_n': 0, 'eRatio_y': 0, 'nFreq_n': 0, 'nFreq_y': 0, 'nRatio_n': 0, 'nRatio_y': 0, 'aFreq_n': 0, 'aFreq_y': 0, 'aRatio_n': 0, 'aRatio_y': 0, 'cFreq_n': 0, 'cFreq_y': 0, 'cRatio_n': 0, 'cRatio_y': 0, 'oFreq_n': 0, 'oFreq_y': 0, 'oRatio_n': 0, 'oRatio_y': 0, 'count': 0} for sent in aggregate._clean(text.lower()).split('\n'): if sent != '': # Predict & Aggregate sentence-level values p = predict_sent(sent) aggs = aggregate_sent(p) # Update the file-level values for key, val in aggs.items(): preds[key] += val preds = normalize_ratios(preds) helpers.print_preds(preds, text[0:50]) return preds
def print_data(args): # TODO: Move this elsewhere. with open(constants.AGGREGATE_INFO_FILE, 'rb') as f: preds = _pickle.load(f) results = {'eRatio_y': 0, 'eRatio_n': 0, 'nRatio_y': 0, 'nRatio_n': 0, 'aRatio_y': 0, 'aRatio_n': 0, 'cRatio_y': 0, 'cRatio_n': 0, 'oRatio_y': 0, 'oRatio_n': 0} for key, pred in preds.items(): if pred.eRatio_y > pred.eRatio_n: results['eRatio_y'] += 1 else: results['eRatio_n'] += 1 if pred.nRatio_y > pred.nRatio_n: results['nRatio_y'] += 1 else: results['nRatio_n'] += 1 if pred.aRatio_y > pred.aRatio_n: results['aRatio_y'] += 1 else: results['aRatio_n'] += 1 if pred.cRatio_y > pred.cRatio_n: results['cRatio_y'] += 1 else: results['cRatio_n'] += 1 if pred.oRatio_y > pred.oRatio_n: results['oRatio_y'] += 1 else: results['oRatio_n'] += 1 helpers.print_preds(results, '') with open('results.pkl', 'wb') as f: _pickle.dump(results, f)
def load_preprocessed(self, vocab_file, tensor_file): with open(vocab_file, 'rb') as f: self.chars = cPickle.load(f) self.vocab_size = len(self.chars) self.vocab = dict(zip(self.chars, range(len(self.chars)))) self.tensor = np.load(tensor_file) train_size = int(self.tensor.shape[0] * 0.9) self.valid = self.tensor[train_size:] self.train = self.tensor[:train_size]
def main(_): print("Parameters: ") for k, v in FLAGS.__flags.items(): print("{} = {}".format(k, v)) if not os.path.exists("./prepro/"): os.makedirs("./prepro/") if FLAGS.prepro: img_feat, tags_idx, a_tags_idx, vocab_processor = data_utils.load_train_data(FLAGS.train_dir, FLAGS.tag_path, FLAGS.prepro_dir, FLAGS.vocab) else: img_feat = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "img_feat.dat"), 'rb')) tags_idx = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "tag_ids.dat"), 'rb')) a_tags_idx = cPickle.load(open(os.path.join(FLAGS.prepro_dir, "a_tag_ids.dat"), 'rb')) vocab_processor = VocabularyProcessor.restore(FLAGS.vocab) img_feat = np.array(img_feat, dtype='float32')/127.5 - 1. test_tags_idx = data_utils.load_test(FLAGS.test_path, vocab_processor) print("Image feature shape: {}".format(img_feat.shape)) print("Tags index shape: {}".format(tags_idx.shape)) print("Attribute Tags index shape: {}".format(a_tags_idx.shape)) print("Vocab size: {}".format(len(vocab_processor._reverse_mapping))) print("Vocab max length: {}".format(vocab_processor.max_document_length)) data = Data(img_feat, tags_idx, a_tags_idx, test_tags_idx, FLAGS.z_dim, vocab_processor) Model = getattr(sys.modules[__name__], FLAGS.model) print(Model) model = Model(data, vocab_processor, FLAGS) model.build_model() model.train()
def __init__(self): self.data_dir = '/tempspace/hyuan/VAE/Cifar/cifar-10-batches-py/' self.train_idx= 0 self.test_idx =0 self.data_set = self.load() self.data_set = np.transpose(self.data_set,(0,2,3,1) ) self.data_set = self.data_set/127.5 - 1 self.test_set =self.load_test() self.test_set = np.transpose(self.test_set,(0,2,3,1) ) self.test_set = self.test_set/127.5 - 1
def unpickle(self,file): fo = open(file, 'rb') data = cPickle.load(fo, encoding='latin1') fo.close() x= data['data'].reshape((10000,3,32,32)) return x
def load(self): train_data = [self.unpickle(self.data_dir+'data_batch_'+str(i)) for i in range(1,6)] train_x = np.concatenate([d for d in train_data], axis=0) np.random.seed(0) np.random.shuffle(train_x) return train_x
def readDataFile(self,file,binary=0): with open(file) as f: if binary == 0: data = json.load(f) else: data = cPickle.load(f) return data
def loadData(self): print("Load data") # load config config = {} config['root_name'] = 'root_default_name' # overload config file if exists next to it # then, save merged config into self.memoData['config'] prefix, ext = os.path.splitext(self.location) config_file = prefix+'_config.json' if os.path.isfile(config_file): external_config = self.readDataFile(config_file) print('info: External config file found.') if isinstance( external_config, dict ): self.memoData['config'] = self.dict_merge(config, external_config, addKey=1) print('info: External config merged.') else: self.memoData['config'] = config print('info: External config is not a dict and ignored.') else: self.memoData['config'] = config # load user data user_dirPath = os.path.join(os.path.expanduser('~'), 'Tool_Config', self.__class__.__name__) user_setting_filePath = os.path.join(user_dirPath, 'setting.json') if os.path.isfile(user_setting_filePath): sizeInfo = self.readDataFile(user_setting_filePath) self.setGeometry(*sizeInfo)
def readFileData(self,file,binary=0): with open(file) as f: if binary == 0: data = json.load(f) else: data = cPickle.load(f) return data