我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.savez_compressed()。
def save(self, filename): """Saves the collection to a file. Parameters ---------- filename : :obj:`str` The file to save the collection to. Raises ------ ValueError If the file extension is not .npy or .npz. """ file_root, file_ext = os.path.splitext(filename) if file_ext == '.npy': np.save(filename, self._data) elif file_ext == '.npz': np.savez_compressed(filename, self._data) else: raise ValueError('Extension %s not supported for point saves.' %(file_ext))
def save_weights(fname, params, metadata=None): """ assumes all params have unique names. """ # Includes batchnorm params now names = [par.name for par in params] if len(names) != len(set(names)): raise ValueError('need unique param names') param_dict = { param.name : param.get_value(borrow=False) for param in params } if metadata is not None: param_dict['metadata'] = pickle.dumps(metadata) logging.info('saving {} parameters to {}'.format(len(params), fname)) # try to avoid half-written files fname = Path(fname) if fname.exists(): tmp_fname = Path(fname.stripext() + '.tmp.npz') # TODO yes, this is a hack np.savez_compressed(str(tmp_fname), **param_dict) tmp_fname.rename(fname) else: np.savez_compressed(str(fname), **param_dict)
def save(self, NumpyFile): # open file nfile = open(NumpyFile, "wb") # save internals numpy.savez_compressed(nfile, _ImgNum = numpy.array([self._ImgNum]), _MeasNum = numpy.array([self._MeasNum]), _KeySizeBits = numpy.array([self._KeySizeBits]), _KeySizeBytes = numpy.array([self._KeySizeBytes]), _KeyData = self._KeyData, _ImageInfo = self._ImageInfo, *[self._ImageData[idx] for idx in range(0, self._ImgNum)]) # close file nfile.close()
def process_training_data(num_clips): """ Processes random training clips from the full training data. Saves to TRAIN_DIR_CLIPS by default. @param num_clips: The number of clips to process. Default = 5000000 (set in __main__). @warning: This can take a couple of hours to complete with large numbers of clips. """ num_prev_clips = len(glob(c.TRAIN_DIR_CLIPS + '*')) for clip_num in xrange(num_prev_clips, num_clips + num_prev_clips): clip = process_clip() np.savez_compressed(c.TRAIN_DIR_CLIPS + str(clip_num), clip) if (clip_num + 1) % 100 == 0: print 'Processed %d clips' % (clip_num + 1)
def main(): """ Commandline interface to extract parameters. """ log_sockeye_version(logger) params = argparse.ArgumentParser(description="Extract specific parameters.") arguments.add_extract_args(params) args = params.parse_args() if os.path.isdir(args.input): param_path = os.path.join(args.input, C.PARAMS_BEST_NAME) else: param_path = args.input ext_params = extract(param_path, args.names, args.list_all) if len(ext_params) > 0: utils.check_condition(args.output != None, "An output filename must be specified. (Use --output)") logger.info("Writting extracted parameters to '%s'", args.output) np.savez_compressed(args.output, **ext_params)
def save_vocab(self, path=None): """ Saves the vocabulary into a file. # Arguments: path: Where the vocabulary should be saved. If not specified, a randomly generated filename is used instead. """ dtype = ([('word', '|S{}'.format(self.word_length_limit)), ('count', 'int')]) np_dict = np.array(self.word_counts.items(), dtype=dtype) # sort from highest to lowest frequency np_dict[::-1].sort(order='count') data = np_dict if path is None: path = str(uuid.uuid4()) np.savez_compressed(path, data=data) print("Saved dict to {}".format(path))
def savetofile(self, outfile): """Save model parameters to file.""" # Pickle non-matrix params into bytestring, then convert to numpy byte array pklbytes = pickle.dumps({'hyper': self.hyper, 'epoch': self.epoch, 'pos': self.pos}, protocol=pickle.HIGHEST_PROTOCOL) p = np.fromstring(pklbytes, dtype=np.uint8) # Gather parameter matrices and names pvalues = { n:m.get_value() for n, m in self.params.items() } # Now save params and matrices to file try: np.savez_compressed(outfile, p=p, **pvalues) except OSError as e: raise e else: if isinstance(outfile, str): stdout.write("Saved model parameters to {0}\n".format(outfile))
def save_npz(filename, obj, compression=True): """Saves an object to the file in NPZ format. This is a short-cut function to save only one object into an NPZ file. Args: filename (str): Target file name. obj: Object to be serialized. It must support serialization protocol. compression (bool): If ``True``, compression in the resulting zip file is enabled. """ s = DictionarySerializer() s.save(obj) with open(filename, 'wb') as f: if compression: numpy.savez_compressed(f, **s.target) else: numpy.savez(f, **s.target)
def export_trimmed_glove_vectors(vocab, glove_filename, trimmed_filename, dim): """Saves glove vectors in numpy array Args: vocab: dictionary vocab[word] = index glove_filename: a path to a glove file trimmed_filename: a path where to store a matrix in npy dim: (int) dimension of embeddings """ embeddings = np.zeros([len(vocab), dim]) with open(glove_filename) as f: for line in f: line = line.strip().split(' ') word = line[0] embedding = [float(x) for x in line[1:]] if word in vocab: word_idx = vocab[word] embeddings[word_idx] = np.asarray(embedding) np.savez_compressed(trimmed_filename, embeddings=embeddings)
def export_trimmed_glove_vectors(vocab, glove_filename, trimmed_filename, dim): """ Saves glove vectors in numpy array Args: vocab: dictionary vocab[word] = index glove_filename: a path to a glove file trimmed_filename: a path where to store a matrix in npy dim: (int) dimension of embeddings """ embeddings = np.zeros([len(vocab), dim]) with open(glove_filename,encoding="utf-8") as f: for line in f: line = line.strip().split() word = line[0] embedding = map(float, line[1:]) if word in vocab: word_idx = vocab[word] embeddings[word_idx] = np.asarray(list(embedding)) np.savez_compressed(trimmed_filename, embeddings=embeddings)
def storeData(df, fileLoc='./tmp/', cv=0.30, rs=21): """ # Store the train and CV data in the tmp location for the classifiers. # Input: df: Transformed DataFrame of the Adult Dataset. # fileLoc: location of tmp where the binary data will be stored. # cv: ratio of the cross_validation set in train-cv split # rs: random_state used to the split. # returns: None # Note: data can be accessed using: # Ex: data = np.load('./tmp/testTrainData.npz') # and access the train/test using split using dictionary formatting. # Ex: data['XTrain'] """ if not os.path.exists('tmp'): os.makedirs('tmp') filename = fileLoc+'testTrainData' XTrain, XTest, yTrain, yTest = trainCvSplit(df, cv, rs) kwargs = {'XTrain': XTrain, 'XTest': XTest, 'yTrain': yTrain, 'yTest': yTest } np.savez_compressed(filename, **kwargs) return None
def _make_npz(path, urls): x_url, y_url = urls x_path = download.cached_download(x_url) y_path = download.cached_download(y_url) with gzip.open(x_path, 'rb') as fx, gzip.open(y_path, 'rb') as fy: fx.read(4) fy.read(4) N, = struct.unpack('>i', fx.read(4)) if N != struct.unpack('>i', fy.read(4))[0]: raise RuntimeError('wrong pair of MNIST images and labels') fx.read(8) x = numpy.empty((N, 784), dtype=numpy.uint8) y = numpy.empty(N, dtype=numpy.uint8) for i in six.moves.range(N): y[i] = ord(fy.read(1)) for j in six.moves.range(784): x[i, j] = ord(fx.read(1)) numpy.savez_compressed(path, x=x, y=y) return {'x': x, 'y': y}
def load(filename, n_episodes_model=1): """ Load model (T,R) from <filename>_model.npz. Update t, r, s0 if no model is available, generate and save from SASR_step file """ global t, r, s0 file_model = filename + ".npz" if os.path.isfile(file_model): print("Model file found") with np.load(file_model) as fm: t = fm['T'] r = fm['R'] s0 = fm['s0'] else: print("Model file not found") generate_t_and_r(filename, n_episodes_model) # create t, r, s0 """ Save model (T,R) to <filename>_model.npz """ np.savez_compressed(file_model, T=t, R=r, s0=s0) return
def save_mean_representations(model, model_filename, X, labels, pred_file): n_items, dv = X.shape n_classes = model.n_classes n_topics = model.d_t # try normalizing input vectors test_X = normalize(np.array(X, dtype='float32'), axis=1) model.load_params(model_filename) # evaluate bound on test set item_mus = [] for item in range(n_items): y = labels[item] # save the mean document representation r_mu = model.get_mean_doc_rep(test_X[item, :], y) item_mus.append(np.array(r_mu)) # write all the test doc representations to file if pred_file is not None and n_topics > 1: np.savez_compressed(pred_file, X=np.array(item_mus), y=labels)
def save(self, filename): """Writes the image to a file. Parameters ---------- filename : :obj:`str` The file to save the image to. Must be one of .png, .jpg, .npy, or .npz. Raises ------ ValueError If an unsupported file type is specified. """ file_root, file_ext = os.path.splitext(filename) if file_ext in COLOR_IMAGE_EXTS: im_data = self._image_data() pil_image = PImage.fromarray(im_data.squeeze()) pil_image.save(filename) elif file_ext == '.npy': np.save(filename, self._data) elif file_ext == '.npz': np.savez_compressed(filename, self._data) else: raise ValueError('Extension %s not supported' % (file_ext))
def write_npz(windows, proximity_matrix, output_file): """Write a proximity matrix to an npz file. npz files are a compressed numpy-specific format, meaning they take up less disk space, but cannot be easily opened by other programming languages (e.g. R). For more information see :func:`numpy.savez_compressed`. :param tuple windows: (list of x-axis windows, list of y-axis windows) :param proximity_matrix: Input proximity matrix. :type proximity_matrix: :class:`numpy array <numpy.ndarray>` :param str filepath: Path to save matrix file. """ window_dict = { 'windows_{}'.format(i): win for i, win in enumerate(windows)} np.savez_compressed(output_file, scores=proximity_matrix, **window_dict)
def SaveGeometryMatrix(self,filename='geo_matrix'): if ((self.LMatrixColumns is not None) & (self.LMatrixRows is not None) & (self.LMatrixValues is not None)): np.savez_compressed(filename, \ columns = self.LMatrixColumns,\ rows = self.LMatrixRows,\ values = self.LMatrixValues,\ shape = self.LMatrixShape, \ grid_rmin = self.Rmin,\ grid_rmax = self.Rmax,\ grid_nr = self.nR,\ grid_zmin = self.Zmin,\ grid_zmax = self.Zmax,\ grid_nz = self.nZ,\ gridtype = 'RectangularGeometryMatrix')
def save_weights(self,filename): # save both weights and variables with open(filename,'wb') as f: # extract all weights in one go: w = self.get_value_of(self.get_weights()+self.traverse('variables')) print(len(w),'weights (and variables) obtained.') # create an array object and put all the arrays into it. # otherwise np.asanyarray() within np.savez_compressed() # might make stupid mistakes arrobj = np.empty([len(w)],dtype='object') # array object for i in range(len(w)): arrobj[i] = w[i] np.savez_compressed(f,w=arrobj) print('successfully saved to',filename) return True
def run(self): while True: name, data = self.queue.get() if name is None: break if data.shape[2] == 1 or data.shape[2] == 3: name += '.png' cv2.imwrite(os.path.join(self.path + name), data) #imgOut = cv2.resize(imgOut, dsize=(img.shape[1],img.shape[0])) #original[:,:,0] = np.repeat(np.mean(original, axis=2, keepdims=True), 3, axis=2) #original[:,:,0] *= 1-imgOut* 1.3 #original[:,:,1] *= 1-imgOut* 1.3 #original[:,:,2] *= imgOut* 1.3 #cv2.imshow('OUT2', original /255) #cv2.waitKey(1) #cv2.imwrite('%s-shown.png' % fileName, original) else: name += '.npz' np.savez_compressed(os.path.join(self.path + name), data=data)
def install( self, local_dst_dir_=None, local_src_dir_=None, clean_install_=False): ''' Install the dataset into directly usable format, requires downloading for public dataset. Args: local_dst_dir_: string or None where to install the dataset, None -> "%(default_dir)s" local_src_dir_: string or None where to find the raw downloaded files, None -> "%(default_dir)s" ''' local_dst_dir = self.DEFAULT_DIR if local_dst_dir_ is None else Path(local_dst_dir_) local_src_dir = self.DEFAULT_DIR if local_src_dir_ is None else Path(local_src_dir_) local_dst_dir.mkdir(parents=True, exist_ok=True) assert local_src_dir.exists() images = np.empty((60000,3,32,32), dtype=np.uint8) labels = np.empty((60000,), dtype=np.uint8) tarfile_name = str(local_src_dir / 'cifar-10-python.tar.gz') with tarfile.open(tarfile_name, 'r:gz') as tf: for i in range(5): with tf.extractfile('cifar-10-batches-py/data_batch_%d'%(i+1)) as f: data_di = pickle.load(f, encoding='bytes') images[(10000*i):(10000*(i+1))] = data_di[b'data'].reshape((10000,3,32,32)) labels[(10000*i):(10000*(i+1))] = np.asarray(data_di[b'labels'], dtype=np.uint8) with tf.extractfile('cifar-10-batches-py/test_batch') as f: data_di = pickle.load(f, encoding='bytes') images[50000:60000] = data_di[b'data'].reshape((10000,3,32,32)) labels[50000:60000] = data_di[b'labels'] np.savez_compressed(str(local_dst_dir / 'cifar10.npz'), images=images, labels=labels) if clean_install_: os.remove(tarfile_name)
def write_sar_log(sars: List, logdir: str, episode_reward: int, suffix: str=''): """Write state-action-rewards to a log file.""" np.savez_compressed(os.path.join(logdir, '%s_%s%s' % (str(time.time())[-5:], episode_reward, suffix)), np.vstack(sars))
def train_glove(infile, inputSize=20000, batchSize=100, dimensionSize=100, maxEpochs=1000, outfile='result', x_max=100, alpha=0.75): options = locals().copy() print 'initializing parameters' params = init_params(options) tparams = init_tparams(params) print 'loading data' I, J, Weight = load_data(infile) n_batches = int(np.ceil(float(I.get_value(borrow=True).shape[0]) / float(batchSize))) print 'building models' weightVector, iVector, jVector, cost = build_model(tparams, options) grads = T.grad(cost, wrt=tparams.values()) f_grad_shared, f_update = adadelta(tparams, grads, weightVector, iVector, jVector, cost) logFile = outfile + '.log' print 'training start' for epoch in xrange(maxEpochs): costVector = [] iteration = 0 for batchIndex in random.sample(range(n_batches), n_batches): cost = f_grad_shared(Weight.get_value(borrow=True, return_internal_type=True)[batchIndex*batchSize:(batchIndex+1)*batchSize], I.get_value(borrow=True, return_internal_type=True)[batchIndex*batchSize: (batchIndex+1)*batchSize], J.get_value(borrow=True, return_internal_type=True)[batchIndex*batchSize: (batchIndex+1)*batchSize]) f_update() costVector.append(cost) if (iteration % 1000 == 0): buf = 'epoch:%d, iteration:%d/%d, cost:%f' % (epoch, iteration, n_batches, cost) print buf print2file(buf, logFile) iteration += 1 trainCost = np.mean(costVector) buf = 'epoch:%d, cost:%f' % (epoch, trainCost) print buf print2file(buf, logFile) tempParams = unzip(tparams) np.savez_compressed(outfile + '.' + str(epoch), **tempParams)
def save_matrix(f, m): np.savez_compressed(f, data=m.data, indices=m.indices, indptr=m.indptr, shape=m.shape)
def save_pkl_files(dsm_prefix, dsm, save_in_one_file=False): """ Save the space to separate pkl files. :param dsm_prefix: :param dsm: """ # Save in a single file (for small spaces) if save_in_one_file: io_utils.save(dsm, dsm_prefix + '.pkl') # Save in multiple files: npz for the matrix and pkl for the other data members of Space else: mat = coo_matrix(dsm.cooccurrence_matrix.get_mat()) np.savez_compressed(dsm_prefix + 'cooc.npz', data=mat.data, row=mat.row, col=mat.col, shape=mat.shape) with open(dsm_prefix + '_row2id.pkl', 'wb') as f_out: pickle.dump(dsm._row2id, f_out, 2) with open(dsm_prefix + '_id2row.pkl', 'wb') as f_out: pickle.dump(dsm._id2row, f_out, 2) with open(dsm_prefix + '_column2id.pkl', 'wb') as f_out: pickle.dump(dsm._column2id, f_out, 2) with open(dsm_prefix + '_id2column.pkl', 'wb') as f_out: pickle.dump(dsm._id2column, f_out, 2)
def _close(self): # Write everything np.savez_compressed(self.request.get_file(), *self._images)
def test_compressed_roundtrip(): arr = np.random.rand(200, 200) npz_file = os.path.join(tempdir, 'compressed.npz') np.savez_compressed(npz_file, arr=arr) arr1 = np.load(npz_file)['arr'] assert_array_equal(arr, arr1)
def savez_compressed(file, *args, **kwds): """Saves one or more arrays into a file in compressed ``.npz`` format. It is equivalent to :func:`cupy.savez` function except the output file is compressed. .. seealso:: :func:`cupy.savez` for more detail, :func:`numpy.savez_compressed` """ args = map(cupy.asnumpy, args) for key in kwds: kwds[key] = cupy.asnumpy(kwds[key]) numpy.savez_compressed(file, *args, **kwds)
def process_glove(args, vocab_list, save_path, size=4e5, random_init=True): """ :param vocab_list: [vocab] :return: """ if not gfile.Exists(save_path + ".npz"): glove_path = os.path.join(args.glove_dir, "glove.6B.{}d.txt".format(args.glove_dim)) if random_init: glove = np.random.randn(len(vocab_list), args.glove_dim) else: glove = np.zeros((len(vocab_list), args.glove_dim)) found = 0 with open(glove_path, 'r') as fh: for line in tqdm(fh, total=size): array = line.lstrip().rstrip().split(" ") word = array[0] vector = list(map(float, array[1:])) if word in vocab_list: idx = vocab_list.index(word) glove[idx, :] = vector found += 1 if word.capitalize() in vocab_list: idx = vocab_list.index(word.capitalize()) glove[idx, :] = vector found += 1 if word.upper() in vocab_list: idx = vocab_list.index(word.upper()) glove[idx, :] = vector found += 1 print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab_list), glove_path)) np.savez_compressed(save_path, glove=glove) print("saved trimmed glove matrix at: {}".format(save_path))
def write_values(self, tensors, compress=False): """ write dictionary of numpy.ndarray's with Op name as key to file Arguments: tensors (dict): A dictionary of numpy.ndarray's with Op name as key compress: specify whether to compress tensors """ if compress: np.savez_compressed(self.name, **tensors) else: np.savez(self.name, **tensors)
def process_word2vec(word2vec_dir, vocab, save_path, random_init=True): # read pre-trained word embedddings from the binary file print('Loading google word2vec...') word2vec_path = word2vec_dir + '/GoogleNews-vectors-negative300.bin.gz' word_vectors = KeyedVectors.load_word2vec_format(word2vec_path, binary=True) print('Word2vec loaded!') if random_init: word2vec = np.random.uniform(-0.25, 0.25, (len(vocab), 300)) else: word2vec = np.zeros((len(vocab), 300)) found = 0 for idx, token in enumerate(vocab): try: vec = word_vectors[token] except: pass else: word2vec[idx, :] = vec found += 1 del word_vectors print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab), word2vec_path)) np.savez_compressed(save_path, word2vec=word2vec) print("saved trimmed word2vec matrix at: {}".format(save_path)) # construct embedding vectors according to the GloVe word vectors and vocabulary
def process_glove(glove_dir, glove_dim, vocab_dir, save_path, random_init=True): """ :param vocab_list: [vocab] :return: """ save_path = save_path + '.{}'.format(glove_dim) if not os.path.isfile(save_path + ".npz"): # read vocabulary with open(vocab_dir + '/vocabulary.pickle', 'rb') as f: vocab_map = cPickle.load(f) f.close() vocab_list = list(zip(*vocab_map)[0]) glove_path = os.path.join(glove_dir, "glove.6B.{}d.txt".format(glove_dim)) if random_init: glove = np.random.uniform(-0.25, 0.25, (len(vocab_list), glove_dim)) else: glove = np.zeros((len(vocab_list), glove_dim)) found = 0 with open(glove_path, 'r') as fh: for line in fh.readlines(): array = line.lstrip().rstrip().split(" ") word = array[0] vector = list(map(float, array[1:])) if word in vocab_list: idx = vocab_list.index(word) glove[idx, :] = vector found += 1 if word.capitalize() in vocab_list: idx = vocab_list.index(word.capitalize()) glove[idx, :] = vector found += 1 if word.upper() in vocab_list: idx = vocab_list.index(word.upper()) glove[idx, :] = vector found += 1 print("{}/{} of word vocab have corresponding vectors in {}".format(found, len(vocab_list), glove_path)) np.savez_compressed(save_path, glove=glove) print("saved trimmed glove matrix at: {}".format(save_path))
def save_estimates(self, fname='', notes='', force=False): """ Saves the JIVE estimates U, D, V, full, rank for block secific joint/individual spaces U, D, V, rank for common joint space some metadata (when saved, some nots) Parameters ---------- fname: name of the file notes: any notes you want to include force: whether or note to overwrite a file with the same name """ if os.path.exists(fname) and (not force): raise ValueError('%s already exists' % fname) kwargs = {} svd_dat = ['scores', 'sing_vals', 'loadings', 'rank'] kwargs['K'] = self.K block_estimates = self.get_block_specific_estimates() for k in range(self.K): for mode in ['joint', 'individual']: for dat in svd_dat + ['full']: label = '%d_%s_%s' % (k, mode, dat) kwargs[label] = block_estimates[k][mode][dat] common_joint = self.get_common_joint_space_estimate() for dat in svd_dat: kwargs['common_%s' % dat] = common_joint[dat] current_time = time.strftime("%m/%d/%Y %H:%M:%S") kwargs['metadata'] = [current_time, notes] np.savez_compressed(fname, **kwargs)
def save_init_svd(self, fname='', notes='', force=False): """ Saves the initial SVD so it can be loaded later without recomputing Parameters ---------- fname: name of the file notes: any notes you want to include force: whether or note to overwrite a file with the same name """ if not hasattr(self.blocks[0], 'scores'): raise ValueError('initial svd has not yet been computed') if os.path.exists(fname) and (not force): raise ValueError('%s already exists' % fname) kwargs = {} svd_dat = ['scores', 'sing_vals', 'loadings', 'rank'] kwargs['K'] = self.K for k in range(self.K): kwargs['%d_scores' % k] = self.blocks[k].scores kwargs['%d_sv' % k] = self.blocks[k].sv kwargs['%d_loadings' % k ] = self.blocks[k].loadings kwargs['%d_init_svd_rank' % k] = self.blocks[k].init_svd_rank np.savez_compressed(fname, **kwargs)
def save(self, out_file): """ Save the current memory into a file in Numpy format :param out_file: File storage path :return: """ np.savez_compressed(out_file, states=self._states, actions=self._actions, rewards=self._rewards, terminals=self._terminals)
def _save_np_compressed_data(file_name, *args): mkdirs_if_not_exist(dirname(file_name)) np.savez_compressed(file_name, *args)
def save_frame_data(archive, path, videos, object_point_set, verbose=True): if verbose: print("Saving corners to {0:s}".format(path)) for video in videos: archive[IMAGE_POINTS + str(video.name)] = video.image_points archive[FRAME_NUMBERS + str(video.name)] = list(video.usable_frames.keys()) if len(video.poses) > 0: archive[POSES + str(video.name)] = np.array([pose.T for pose in video.poses]) archive[OBJECT_POINT_SET] = object_point_set np.savez_compressed(path, **archive)
def save_calibration_intervals(archive, path, videos, verbose=True): if verbose: print("Saving calibration intervals to {0:s}".format(path)) ranges = [] for video in videos: if video.calibration_interval is None: raise ValueError("Expecting all cameras to have valid calibration frame ranges. Got: None") ranges.append(video.calibration_interval) ranges = np.array(ranges) archive[CALIBRATION_INTERVALS] = ranges np.savez_compressed(path, **archive)
def save_model(self): logging.info("Saving model") save_filename = os.path.join(self.model_folder,'{}_epoch{}.npz'.format(self.model_name, self.epoch)) np.savez_compressed(save_filename, *lasagne.layers.get_all_param_values(self.network))
def _preprocess(self, input_file, tensor_file): if input_file.endswith(".bz2"): file_reference = BZ2File(input_file, "r") elif input_file.endswith(".txt"): file_reference = io.open(input_file, "r") raw_data = file_reference.read() file_reference.close() data = raw_data.encode(encoding=self.encoding) # Convert the entirety of the data file from characters to indices via the vocab dictionary. # How? map(function, iterable) returns a list of the output of the function # executed on each member of the iterable. E.g.: # [14, 2, 9, 2, 0, 6, 7, 0, ...] # np.array converts the list into a numpy array. self.tensor = np.array(list(map(self.vocab.get, data))) # Compress and save the numpy tensor array to data.npz. np.savez_compressed(tensor_file, tensor_data=self.tensor)
def save_vocab(self, path_count, path_vocab, word_limit=100000): """ Saves the master vocabulary into a file. """ # reserve space for 10 special tokens words = OrderedDict() for token in SPECIAL_TOKENS: # store -1 instead of np.inf, which can overflow words[token] = -1 # sort words by frequency desc_order = OrderedDict(sorted(self.master_vocab.items(), key=lambda kv: kv[1], reverse=True)) words.update(desc_order) # use encoding of up to 30 characters (no token conversions) # use float to store large numbers (we don't care about precision loss) np_vocab = np.array(words.items(), dtype=([('word', '|S30'), ('count', 'float')])) # output count for debugging counts = np_vocab[:word_limit] np.savez_compressed(path_count, counts=counts) # output the index of each word for easy lookup final_words = OrderedDict() for i, w in enumerate(words.keys()[:word_limit]): final_words.update({w: i}) with open(path_vocab, 'w') as f: f.write(json.dumps(final_words, indent=4, separators=(',', ': ')))
def save_weights(self, weightspath=None): weightspath = super(LasagneNetwork, self)._weightspath(weightspath) weights = {name: p.get_value() for name, p in LasagneNetwork._get_named_params(self.out_layer)} np.savez_compressed(weightspath, **weights)
def _preprocess(self, input_file, tensor_file): if input_file.endswith(".bz2"): file_reference = BZ2File(input_file, "r") elif input_file.endswith(".txt"): file_reference = io.open(input_file, "r") raw_data = file_reference.read() file_reference.close() data = raw_data.encode(encoding=self.encoding) # Convert the entirety of the data file from characters to indices via the vocab dictionary. # How? map(function, iterable) returns a list of the output of the function # executed on each member of the iterable. E.g.: # [14, 2, 9, 2, 0, 6, 7, 0, ...] # np.array converts the list into a numpy array. self.tensor = np.array(map(self.vocab.get, data)) # Compress and save the numpy tensor array to data.npz. np.savez_compressed(tensor_file, tensor_data=self.tensor)
def main(em_file, em_result): ''' embedding ->numpy ''' em = word2vec.load(em_file) vec = (em.vectors) word2id = em.vocab_hash # d = dict(vector = vec, word2id = word2id) # t.save(d,em_result) np.savez_compressed(em_result,vector=vec,word2id=word2id)
def savelogs(self, ts=None, saveres=True, filename=None): # FIXME: consider HDF5 if ts == None: ts = time.strftime("%Y%m%d-%H%M%S") # np.save("%s/log-x-%s" % (self.cfgprefix, ts), self.iosm.x_) # np.save("%s/log-x_raw-%s" % (self.cfgprefix, ts), self.iosm.x_raw_) # np.save("%s/log-z-%s" % (self.cfgprefix, ts), self.iosm.z_) # np.save("%s/log-zn-%s" % (self.cfgprefix, ts), self.iosm.zn_) # np.save("%s/log-zn_lp-%s" % (self.cfgprefix, ts), self.iosm.zn_lp_) # np.save("%s/log-r-%s" % (self.cfgprefix, ts), self.iosm.r_) # np.save("%s/log-w-%s" % (self.cfgprefix, ts), self.iosm.w_) # network data, pickling reservoir, input weights, output weights # self.res.save("%s/log-%s-res-%s.bin" % (self.cfgprefix, self.cfgprefix, ts)) if filename == None: logfile = "%s/log-learner-%s" % (self.cfgprefix, ts) else: logfile = filename if saveres: np.savez_compressed(logfile, x = self.iosm.x_, x_raw = self.iosm.x_raw_, z = self.iosm.z_, zn = self.iosm.zn_, zn_lp = self.iosm.zn_lp_, r = self.iosm.r_, w = self.iosm.w_, e = self.iosm.e_, t = self.iosm.t_, mse = self.iosm.mse_) else: np.savez_compressed(logfile, x = self.iosm.x_, x_raw = self.iosm.x_raw_, z = self.iosm.z_, zn = self.iosm.zn_, zn_lp = self.iosm.zn_lp_, w = self.iosm.w_, e = self.iosm.e_, t = self.iosm.t_, mse = self.iosm.mse_) print "logs saved to %s" % logfile return logfile
def save_matrix(self, path): with open(path, 'w') as f: np.savez_compressed(f, data=self.__data, rowlabels=self.__rowlabels, columnlabels=self.__columnlabels)
def save_scores(model_options,name_scores): if not os.path.exists('scores/'): os.system('mkdir scores/') save_name = 'scores/scores_'+model_options['name'].split('/')[-1] print 'Dumping scores to: '+save_name if not os.path.isdir('scores/'): os.mkdir('scores') np.savez_compressed(save_name,name_scores)