我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tables.open_file()。
def load_h5(filename): with tables.open_file(filename, 'r') as f: group = f.root._v_groups[cr_constants.ANALYSIS_H5_MATRIX_GROUP] matrix = cr_matrix.GeneBCMatrix.load(group) analysis = SingleGenomeAnalysis(matrix) group = f.root._v_groups[cr_constants.ANALYSIS_H5_PCA_GROUP] analysis._load_pca_h5(group) group = f.root._v_groups[cr_constants.ANALYSIS_H5_CLUSTERING_GROUP] analysis._load_clustering_h5(group) group = f.root._v_groups[cr_constants.ANALYSIS_H5_DIFFERENTIAL_EXPRESSION_GROUP] analysis._load_differential_expression_h5(group) group = f.root._v_groups[cr_constants.ANALYSIS_H5_TSNE_GROUP] analysis._load_tsne_h5(group) return analysis
def main(args, outs): if args.skip or args.is_multi_genome: return tsne_dims = args.tsne_dims matrix = cr_matrix.GeneBCMatrix.load_h5(args.matrix_h5) pca = cr_pca.load_pca_from_h5(args.pca_h5) tsne = cr_tsne.run_tsne(pca.transformed_pca_matrix, input_pcs=args.input_pcs, perplexity=args.perplexity, theta=args.theta, tsne_dims=tsne_dims, max_iter=args.max_iter, stop_lying_iter=args.stop_lying_iter, mom_switch_iter=args.mom_switch_iter, random_state=args.random_seed) tsne_map = {tsne_dims: tsne} filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) with tables.open_file(outs.tsne_h5, 'w', filters = filters) as f: cr_tsne.save_tsne_h5(tsne_map, f) cr_tsne.save_tsne_csv(tsne_map, matrix, outs.tsne_csv)
def merge_barcode_summaries(input_files, output_file): # each chunk produces a barcode summary containing ALL barcodes from ALL gem groups, not just the ones being counted # in that chunk. so the datasets need to be squashed rather than concatenated with tables.open_file(output_file, mode = 'w') as fout: dsets = {} # init datasets using the first input if len(input_files) > 0: with tables.open_file(input_files[0], mode = 'r') as fin: for node in fin.walk_nodes('/', 'Array'): dsets[node.name] = fout.create_carray('/', node.name, obj=node[:]) # add data from the other inputs for input_file in input_files[1:]: with tables.open_file(input_file, mode = 'r') as fin: for (name, carray) in dsets.iteritems(): if name == cr_constants.H5_BC_SEQUENCE_COL: continue # don't modify the barcode sequences carray[:] += fin.get_node('/', name)[:]
def __init__(self, filename, mode, batch_size=5): """ An iterable database that should theoretically allow scalable reading/writing of datasets. batch_size: length of list Notes: meta_file should contain all the related meta data including keys, their corresponding value lengths, overall file size etc """ fn = os.path.expanduser(filename) if mode == 'w' or mode == 'a': print('{}::{} with batch size: {}'.format( 'Writing' if mode == 'w' else 'Appending', self.__class__.__name__, batch_size)) self.h5f_ = tb.open_file(fn, mode=mode, title='%s' % fn) self.data_ = {} elif mode == 'r': self.h5f_ = tb.open_file(fn, mode=mode, title='%s' % fn) print('{}::Loaded with fields: {}'.format(self.__class__.__name__, self.keys)) else: raise RuntimeError('Unknown mode %s' % mode)
def af_h5_to_np(input_path, outpath): files = tables.open_file(input_path, mode = 'r+') speaker_nodes = files.root._f_list_nodes() for spk in speaker_nodes: file_nodes = spk._f_list_nodes() for fls in file_nodes: file_name = fls._v_name af_nodes = fls._f_list_nodes() af_list = [] for fts in af_nodes: features = fts[:] mean = numpy.mean(features,1) normalised_feats = list(numpy.transpose(features)/mean) af_list += normalised_feats numpy.save(outpath + file_name, numpy.array(af_list))
def dump_stec_map(h5_fname, stec_map): """ ??? """ h5file = open_file(h5_fname, mode='w', title='IRI simulated slant TEC') group = h5file.create_group('/', 'phase_arcs', 'Phase connected arcs') if hasattr(stec_map, 'xyz'): group._v_attrs.xyz = stec_map.xyz if hasattr(stec_map, 'llh'): group._v_attrs.llh = stec_map.llh for sat in sorted(stec_map): assert sat[0] == 'G' table = h5file.create_table(group, sat, STecTable, 'GPS prn={} data'.format(sat[1:])) row = table.row for dt, stec_info in stec_map[sat].iteritems(): row['dt'] = (dt - UNIX_EPOCH).total_seconds() row['stec'] = stec_info.stec row['az'] = stec_info.az row['el'] = stec_info.el row['satx'] = stec_info.satx row['saty'] = stec_info.saty row['satz'] = stec_info.satz row.append() table.flush() h5file.close() return h5_fname
def sparse_save(matrix, filename, dtype=np.dtype(np.float64)): print "SAVE SPARSE" print matrix.shape atom = tb.Atom.from_dtype(dtype) f = tb.open_file(filename, 'w') print "saving data" filters = tb.Filters(complevel=5, complib='blosc') out = f.create_carray(f.root, 'data', atom, shape=matrix.data.shape, filters=filters) out[:] = matrix.data print "saving indices" out = f.create_carray(f.root, 'indices', tb.Int64Atom(), shape=matrix.indices.shape, filters=filters) out[:] = matrix.indices print "saving indptr" out = f.create_carray(f.root, 'indptr', tb.Int64Atom(), shape=matrix.indptr.shape, filters=filters) out[:] = matrix.indptr print "saving done" f.close()
def export_crossval(crossval_output, config): outfile_scores = os.path.join(config.output_dir, config.name + "_scores.json") with open(outfile_scores, 'w') as f: json.dump(crossval_output.scores, f, sort_keys=True, indent=4) outfile_results = os.path.join(config.output_dir, config.name + "_results.hdf5") with hdf.open_file(outfile_results, 'w') as f: for fld, v in crossval_output.y_pred.items(): label = "_".join(fld.split()) f.create_array("/", label, obj=v.data) f.create_array("/", label + "_mask", obj=v.mask) f.create_array("/", "y_true", obj=crossval_output.y_true) create_scatter_plot(outfile_results, config)
def create_scatter_plot(outfile_results, config): true_vs_pred = os.path.join(config.output_dir, config.name + "_results.csv") true_vs_pred_plot = os.path.join(config.output_dir, config.name + "_results.png") with hdf.open_file(outfile_results, 'r') as f: prediction = f.get_node("/", "Prediction").read() y_true = f.get_node("/", "y_true").read() np.savetxt(true_vs_pred, X=np.vstack([y_true, prediction]).T, delimiter=',') plt.figure() plt.scatter(y_true, prediction) plt.title('true vs prediction') plt.xlabel('True') plt.ylabel('Prediction') plt.savefig(true_vs_pred_plot)
def open_similarity_matrix(fn): """Open read-only similarity matrix file. Args: fn (str): Filename of similarity matrix Returns: SimilarityMatrix | FrozenSimilarityMatrix: A read-only similarity matrix object """ # peek in file to detect format f = tables.open_file(fn, 'r') is_frozen = 'scores' in f.root f.close() if is_frozen: matrix = FrozenSimilarityMatrix(fn) else: matrix = SimilarityMatrix(fn, cache_labels=True) return matrix
def save_file_origen(file, *, ORIGEN_data, lib, nucs, start_nuclide, time, phi, ORIGEN_time, n_fission_fragments=2.004): with tables.open_file(file, mode="a", title="ORIGEN and CRAM data", filters=tables.Filters(complevel=1)) as h5file: if lib not in h5file.root: create_hdf5_table(file, lib, nucs) table = h5file.get_node(h5file.root, lib + '/origen') table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs) table.row['library'] = lib table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments) table.row['time'] = time table.row['phi'] = phi table.row['n_fission_fragments'] = n_fission_fragments table.row['execution time ORIGEN'] = ORIGEN_time table.row['ORIGEN atom fraction'] = origen_data_to_array_weighted(ORIGEN_data, nucs, n_fission_fragments=n_fission_fragments) table.row['ORIGEN mass fraction'] = origen_data_to_array_materials(ORIGEN_data, nucs) table.row.append() table.flush()
def save_file_cram_lambdify(file, *, CRAM_lambdify_res, lib, nucs, start_nuclide, time, phi, CRAM_lambdify_time, umfpack, n_fission_fragments=2.004): assert len(CRAM_lambdify_res) == len(nucs) with tables.open_file(file, mode="a", title="ORIGEN and CRAM data", filters=tables.Filters(complevel=1)) as h5file: if lib not in h5file.root: create_hdf5_table(file, lib, nucs) nodename = '/cram-lambdify-umfpack' if umfpack else '/cram-lambdify-superlu' table = h5file.get_node(h5file.root, lib + nodename) table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs) table.row['library'] = lib table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments) table.row['time'] = time table.row['phi'] = phi table.row['n_fission_fragments'] = n_fission_fragments table.row['execution time CRAM lambdify'] = CRAM_lambdify_time table.row['CRAM lambdify atom fraction'] = CRAM_lambdify_res CRAM_lambdify_res_normalized = CRAM_lambdify_res/np.sum(CRAM_lambdify_res) table.row['CRAM lambdify mass fraction'] = CRAM_lambdify_res_normalized table.row.append() table.flush()
def save_file_cram_py_solve(file, *, CRAM_py_solve_res, lib, nucs, start_nuclide, time, phi, CRAM_py_solve_time, n_fission_fragments=2.004): assert len(CRAM_py_solve_res) == len(nucs) with tables.open_file(file, mode="a", title="ORIGEN and CRAM data", filters=tables.Filters(complevel=1)) as h5file: if lib not in h5file.root: create_hdf5_table(file, lib, nucs) table = h5file.get_node(h5file.root, lib + '/cram-py_solve') table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs) table.row['library'] = lib table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments) table.row['time'] = time table.row['phi'] = phi table.row['n_fission_fragments'] = n_fission_fragments table.row['execution time CRAM py_solve'] = CRAM_py_solve_time table.row['CRAM py_solve atom fraction'] = CRAM_py_solve_res CRAM_py_solve_res_normalized = CRAM_py_solve_res/np.sum(CRAM_py_solve_res) table.row['CRAM py_solve mass fraction'] = CRAM_py_solve_res_normalized table.row.append() table.flush()
def write(self, frames): """ Write the frames to the target HDF5 file, using the format used by ``pd.Panel.to_hdf`` Parameters ---------- frames : iter[(int, DataFrame)] or dict[int -> DataFrame] An iterable or other mapping of sid to the corresponding OHLCV pricing data. """ with HDFStore(self._path, 'w', complevel=self._complevel, complib=self._complib) \ as store: panel = pd.Panel.from_dict(dict(frames)) panel.to_hdf(store, 'updates') with tables.open_file(self._path, mode='r+') as h5file: h5file.set_node_attr('/', 'version', 0)
def get_number_of_rows(files, verbose): # Swap out tqdm if not verbose def verbose_passthrough(a): return a fn = None if verbose: fn = tqdm else: fn = verbose_passthrough n_wfs = 0 for filename in fn(files): f = tables.open_file(filename) waveform = f.get_node('/waveforms') n_wfs += waveform.nrows f.close() return n_wfs
def main(): args = check_argv() print("Reading:", args.mat_fn) mat = tables.open_file(args.mat_fn) n_audio = mat.root.files_train[0].shape[0] print("No. audio files:", n_audio) filenames = [] for i_audio in xrange(n_audio): filenames.append("".join([chr(i[0]) for i in mat.root.files_train[0][i_audio][0]])) audio_keys = [path.splitext(path.split(i)[-1])[0] for i in filenames] features_dict = {} for i_audio in xrange(n_audio): features = mat.root.F_train_iter[0][i_audio][0] features_dict[audio_keys[i_audio].replace("_", "-")] = features.T print("Writing:", args.npz_fn) np.savez(args.npz_fn, **features_dict)
def read_h5(self, h5_path, load_features=False): h5_file = tables.open_file(h5_path, mode = 'r') if load_features and hasattr(h5_file.root, "features"): self.features = h5_file.root.features[:,:] self.num_fts = h5_file.root.features.shape[1] if hasattr(h5_file.root, "labels"): self.labels = h5_file.root.labels[:] self.num_items = len(self.labels) if hasattr(h5_file.root, "scores"): self.scores = h5_file.root.scores[:] self.num_items = len(self.scores) if hasattr(h5_file.root, "probs"): self.probs = h5_file.root.probs[:] self.num_items = len(self.probs) if hasattr(h5_file.root, "preds"): self.preds = h5_file.root.preds[:] self.num_items = len(self.preds) h5_file.close()
def load_from_hdf5(self, path): """load model in compressed sparse row format from hdf5 file hdf5 file should contain row_ptr, col_ind and data array Args: path: path to the embeddings folder """ self.load_metadata(path) f = tables.open_file(os.path.join(path, 'cooccurrence_csr.h5p'), 'r') row_ptr = np.nan_to_num(f.root.row_ptr.read()) col_ind = np.nan_to_num(f.root.col_ind.read()) data = np.nan_to_num(f.root.data.read()) dim = row_ptr.shape[0] - 1 self.matrix = scipy.sparse.csr_matrix( (data, col_ind, row_ptr), shape=(dim, dim), dtype=np.float32) f.close() self.vocabulary = Vocabulary_cooccurrence() self.vocabulary.load(path) self.name += os.path.basename(os.path.normpath(path))
def load_with_alpha(self, path, power=0.6): # self.load_provenance(path) f = tables.open_file(os.path.join(path, 'vectors.h5p'), 'r') # left = np.nan_to_num(f.root.vectors.read()) left = f.root.vectors.read() sigma = f.root.sigma.read() logger.info("loaded left singular vectors and sigma") sigma = np.power(sigma, power) self.matrix = np.dot(left, np.diag(sigma)) logger.info("computed the product") self.metadata["pow_sigma"] = power self.metadata["size_dimensions"] = int(self.matrix.shape[1]) f.close() self.vocabulary = Vocabulary_simple() self.vocabulary.load(path) self.name += os.path.basename(os.path.normpath(path)) + "_a" + str(power)
def genotypes(self, nodes, h5file): """ Returns the full genotype associated with the provided node """ with tables.open_file(h5file, 'r') as f: ind_IDs = f.root.inds[:] uID_idx = dict([(ID, i) for i, ID in enumerate(ind_IDs)]) genotypes = {} for node in nodes: ID = self.idx_haps[node] uID = np.abs(ID).astype(int) try: file_idx = uID_idx[uID] except KeyError: print("No genotype for", node) continue chrom = signed_to_bool(np.sign(ID)) genotypes[node] = f.root.haps[file_idx][chrom] assert f.root.inds[file_idx] == uID return genotypes
def fetch_svhn_train_test(source_paths, target_path): train_path, test_path = source_paths f_out = tables.open_file(target_path, mode='w') g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data') # Load in the training data Matlab file print('Converting {} to HDF5...'.format(train_path)) train_X_u8, train_y = _read_svhn_matlab(train_path) f_out.create_array(g_out, 'train_X_u8', train_X_u8) f_out.create_array(g_out, 'train_y', train_y) del train_X_u8 del train_y # Load in the test data Matlab file print('Converting {} to HDF5...'.format(test_path)) test_X_u8, test_y = _read_svhn_matlab(test_path) f_out.create_array(g_out, 'test_X_u8', test_X_u8) f_out.create_array(g_out, 'test_y', test_y) del test_X_u8 del test_y f_out.close() return target_path
def fetch_svhn_extra(source_paths, target_path): extra_path = source_paths[0] print('Converting {} to HDF5 (compressed)...'.format(extra_path)) f_out = tables.open_file(target_path, mode='w') g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data') filters = tables.Filters(complevel=9, complib='blosc') X_u8_arr = f_out.create_earray( g_out, 'extra_X_u8', tables.UInt8Atom(), (0, 3, 32, 32), filters=filters) y_arr = f_out.create_earray( g_out, 'extra_y', tables.Int32Atom(), (0,), filters=filters) # Load in the extra data Matlab file _insert_svhn_matlab_to_h5(X_u8_arr, y_arr, extra_path) f_out.close() return target_path
def __init__(self, n_val=729): data_path = usps_data() if data_path is not None: f = tables.open_file(data_path, mode='r') train_X = f.root.usps.train_X train_y = f.root.usps.train_y test_X = f.root.usps.test_X test_y = f.root.usps.test_y if n_val == 0 or n_val is None: self.train_X, self.train_y = train_X, train_y self.val_X = np.zeros((0, 1, 16, 16), dtype=np.float32) self.val_y = np.zeros((0,), dtype=np.int32) else: self.train_X, self.val_X = train_X[:-n_val], train_X[-n_val:] self.train_y, self.val_y = train_y[:-n_val], train_y[-n_val:] self.test_X, self.test_y = test_X, test_y
def _convert_mnist(dataset_name, target_path, train_X_path, train_y_path, test_X_path, test_y_path): print('Convering {} to HDF5'.format(dataset_name)) train_X_u8 = _read_mnist_images(train_X_path) train_y = _read_mnist_labels(train_y_path) test_X_u8 = _read_mnist_images(test_X_path) test_y = _read_mnist_labels(test_y_path) f_out = tables.open_file(target_path, mode='w') g_out = f_out.create_group(f_out.root, 'mnist', 'MNIST data') f_out.create_array(g_out, 'train_X_u8', train_X_u8) f_out.create_array(g_out, 'train_y', train_y) f_out.create_array(g_out, 'test_X_u8', test_X_u8) f_out.create_array(g_out, 'test_y', test_y) f_out.close() return target_path
def __init__(self, h5_path, n_val=10000, val_lower=0.0, val_upper=1.0): f = tables.open_file(h5_path, mode='r') train_X_u8 = f.root.mnist.train_X_u8 train_y = f.root.mnist.train_y self.test_X_u8 = f.root.mnist.test_X_u8 self.test_y = f.root.mnist.test_y if n_val == 0 or n_val is None: self.train_X_u8 = train_X_u8 self.train_y = train_y self.val_X_u8 = np.zeros((0, 1, 28, 28), dtype=np.uint8) self.val_y = np.zeros((0,), dtype=np.int32) else: self.train_X_u8 = train_X_u8[:-n_val] self.val_X_u8 = train_X_u8[-n_val:] self.train_y, self.val_y = train_y[:-n_val], train_y[-n_val:] self.train_X = ImageArrayUInt8ToFloat32(self.train_X_u8, val_lower, val_upper) self.val_X = ImageArrayUInt8ToFloat32(self.val_X_u8, val_lower, val_upper) self.test_X = ImageArrayUInt8ToFloat32(self.test_X_u8, val_lower, val_upper)
def __enter__(self): import tables if self.filename is None: self.filedir = tempfile.mkdtemp() self.filename = os.path.join(self.filedir, 'bench.h5') else: self.filedir = None h5_file = tables.open_file(self.filename, 'w') array_kw_args = {} if self.complevel > 0: array_kw_args['filters'] = tables.Filters(complib=self.complib, complevel=self.complevel) array_path = '/bench' #ary = h5_file.create_array(h5_file.root, array_path[1:], # np.arange(np.prod(file_shape), dtype=file_type).reshape(file_shape)) ary = h5_file.create_earray(h5_file.root, array_path[1:], atom=tables.Atom.from_dtype(file_type), shape=file_shape, expectedrows=self.n_rows, **array_kw_args) for _ in range(0, self.n_rows, 2**10): ary.append(2**8*np.random.randn(2**10, *file_shape[1:])) print(ary.shape) h5_file.close() return self.filename, array_path
def setUp(self): self.test_dir = tempfile.mkdtemp() self.test_filename = os.path.join(self.test_dir, 'test.h5') test_file = tables.open_file(self.test_filename, 'w') self.test_array = np.arange(100*1000).reshape((1000, 10, 10)) self.test_array_path = '/test_array' array = test_file.create_array(test_file.root, self.test_array_path[1:], self.test_array) self.test_table_ary = np.array([ ( np.random.randint(256, size=np.prod(test_table_col_A_shape)).reshape(test_table_col_A_shape), np.random.rand(*test_table_col_B_shape)) for _ in range(100) ], dtype=tables.dtype_from_descr(TestTableRow)) self.test_table_path = '/test_table' table = test_file.create_table(test_file.root, self.test_table_path[1:], TestTableRow) table.append(self.test_table_ary) test_file.close()
def setUp(self): # Load data Y = np.loadtxt(os.path.join(base_path,self.datafile)) m = deepgp.DeepGP([Y.shape[1],5,2],Y,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(2,ARD=True)], num_inducing=2, back_constraint=False) if not os.path.exists(os.path.join(base_path,self.modelfile)): # Create the model file m.randomize() m._trigger_params_changed() m.save(os.path.join(base_path,self.modelfile)) with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f: L = f.create_dataset("L", (1,),dtype=np.float) L[:] = m._log_marginal_likelihood f.close() # Load model parameters with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f: m.param_array[:] = f.root.param_array[:] L = float(f.root.L[:]) m._trigger_params_changed() f.close() self.model = m self.L = L
def setUp(self): # Load data Y = np.loadtxt(os.path.join(base_path,self.outputfile)) X = np.loadtxt(os.path.join(base_path,self.inputfile)) m = deepgp.DeepGP([Y.shape[1],5,X.shape[1]],Y, X=X,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(X.shape[1],ARD=True)], num_inducing=2, back_constraint=False) if not os.path.exists(os.path.join(base_path,self.modelfile)): # Create the model file m.randomize() m._trigger_params_changed() m.save(os.path.join(base_path,self.modelfile)) with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f: L = f.create_dataset("L", (1,),dtype=np.float) L[:] = m._log_marginal_likelihood f.close() # Load model parameters with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f: m.param_array[:] = f.root.param_array[:] L = float(f.root.L[:]) m._trigger_params_changed() f.close() self.model = m self.L = L
def setUp(self): # Load data Y = np.loadtxt(os.path.join(base_path,self.datafile)) m = deepgp.DeepGP([Y.shape[1],5,2],Y,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(2,ARD=True)], num_inducing=2, back_constraint=True, encoder_dims=[[3],[2]]) if not os.path.exists(os.path.join(base_path,self.modelfile)): # Create the model file m.randomize() m._trigger_params_changed() m.save(os.path.join(base_path,self.modelfile)) with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f: L = f.create_dataset("L", (1,),dtype=np.float) L[:] = m._log_marginal_likelihood f.close() # Load model parameters with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f: m.param_array[:] = f.root.param_array[:] L = float(f.root.L[:]) m._trigger_params_changed() f.close() self.model = m self.L = L
def setUp(self): # Load data Y = np.loadtxt(os.path.join(base_path,self.outputfile)) X = np.loadtxt(os.path.join(base_path,self.inputfile)) m = deepgp.DeepGP([Y.shape[1],5,X.shape[1]],Y, X=X,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(X.shape[1],ARD=True)], num_inducing=2, back_constraint=True, encoder_dims=[[3]]) if not os.path.exists(os.path.join(base_path,self.modelfile)): # Create the model file m.randomize() m._trigger_params_changed() m.save(os.path.join(base_path,self.modelfile)) with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f: L = f.create_dataset("L", (1,),dtype=np.float) L[:] = m._log_marginal_likelihood f.close() # Load model parameters with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f: m.param_array[:] = f.root.param_array[:] L = float(f.root.L[:]) m._trigger_params_changed() f.close() self.model = m self.L = L
def process(self, rows_slice): with Worker.hdf5_lock: with tables.open_file(self.hdf5_file, 'r+') as fileh: hdf5_array = fileh.get_node(self.path) X = hdf5_array[rows_slice, ...] eensy = np.finfo(np.float32).eps weensy = np.finfo(np.float32).tiny * 100 tmp = self.random_state.randn(rows_slice.stop - rows_slice.start, self.N) X += (eensy * X + weensy) * tmp with Worker.hdf5_lock: with tables.open_file(self.hdf5_file, 'r+') as fileh: hdf5_array = fileh.get_node(self.path) hdf5_array[rows_slice, ...] = X del X
def cluster_labels_A(hdf5_file, c, lock, I, rows_slice): """One of the task to be performed by a pool of subprocesses, as the first step in identifying the cluster labels and indices of the cluster centers for Affinity Propagation clustering. """ with Worker.hdf5_lock: with tables.open_file(hdf5_file, 'r+') as fileh: S = fileh.root.aff_prop_group.similarities s = S[rows_slice, ...] s = np.argmax(s[:, I], axis = 1) with lock: c[rows_slice] = s[:] del s
def cluster_labels_B(hdf5_file, s_reduced, lock, I, ii, iix, rows_slice): """Second task to be performed by a pool of subprocesses before the cluster labels and cluster center indices can be identified. """ with Worker.hdf5_lock: with tables.open_file(hdf5_file, 'r+') as fileh: S = fileh.root.aff_prop_group.similarities s = S[rows_slice, ...] s = s[:, ii] s = s[iix[rows_slice]] with lock: s_reduced += s[:].sum(axis = 0) del s
def combine_h5_files(in_files, out_file, groups): fins = [tables.open_file(filename, 'r') for filename in in_files] with tables.open_file(out_file, 'w') as fout: for group in groups: _combine_h5_group(fins, fout, group) for fin in fins: fin.close()
def open_h5_for_writing(filename): filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) return tables.open_file(filename, 'w', filters = filters)
def load_pca_from_h5(filename): """ Load just the PCA info from an analysis h5 """ with tables.open_file(filename, 'r') as f: group = f.root._v_groups[cr_constants.ANALYSIS_H5_PCA_GROUP] # Just take the first PCA object, assuming we never have multiple for _, pca in cr_io.load_h5_iter(group, PCA): return pca
def load_clustering_from_h5(filename, clustering_key): """ Load a single clustering from an analysis h5 """ with tables.open_file(filename, 'r') as f: group = getattr(f.root, cr_constants.ANALYSIS_H5_CLUSTERING_GROUP) for subgroup in group: if subgroup._v_name == '_' + clustering_key: return cr_io.load_h5_namedtuple(subgroup, cr_clustering.CLUSTERING) raise ValueError("Could not find clustering key: %s in HDF5 file %s" % (clustering_key, filename))
def load_bcs_from_matrix_h5(filename): """ Load just the barcodes from a matrix h5 """ with tables.open_file(filename, 'r') as f: # Take the first group, assuming a single-genome matrix group = list(f.list_nodes(f.root))[0] return cr_matrix.GeneBCMatrix.load_bcs_from_h5_group(group)
def load_graphclust_from_h5(filename): with tables.open_file(filename, 'r') as f: group = f.root._v_groups[cr_constants.ANALYSIS_H5_CLUSTERING_GROUP] # Take the first entry for key, clustering in cr_io.load_h5_iter(group, cr_clustering.CLUSTERING): clustering_type, _ = cr_clustering.parse_clustering_key(key) if clustering_type == cr_clustering.CLUSTER_TYPE_GRAPHCLUST: return clustering
def concatenate_h5(input_files, output_file): with tables.open_file(output_file, mode = 'w') as fout: dsets = {} # init datasets using the first input if len(input_files) > 0: with tables.open_file(input_files[0], mode = 'r') as fin: for node in fin.walk_nodes('/', 'Array'): atom = tables.Atom.from_dtype(np.dtype(node.dtype)) dsets[node.name] = fout.create_earray('/', node.name, atom, (0,)) # copy the data for input_file in input_files: with tables.open_file(input_file, mode = 'r') as fin: for (name, earray) in dsets.iteritems(): earray.append(fin.get_node('/', name)[:])
def get_h5_filetype(filename): with tables.open_file(filename, mode = 'r') as f: try: filetype = f.get_node_attr('/', cr_constants.H5_FILETYPE_KEY) except AttributeError: filetype = None # older files lack this key return filetype
def load_array_h5(filename, name): """ Load an array from the root of an h5 file """ with tables.open_file(filename, 'r') as f: return getattr(f.root, name).read()
def save_h5(self, filename, extra_attrs={}): self.tocsc() filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) with tables.open_file(filename, 'w', filters = filters) as f: f.set_node_attr('/', cr_constants.H5_FILETYPE_KEY, MATRIX_H5_FILETYPE) # set optional top-level attributes for (k,v) in extra_attrs.iteritems(): f.set_node_attr('/', k, v) for genome, matrix in self.matrices.iteritems(): group = f.create_group(f.root, genome) matrix.save_h5(f, group)
def merge(self, filename): self.tocoo() with tables.open_file(filename, 'r') as f: for group in f.list_nodes(f.root): genome = group._v_name if genome in self.matrices: self.matrices[genome].merge(group) else: self.matrices[genome] = GeneBCMatrix.load(group)
def load_h5(filename): matrices = GeneBCMatrices() with tables.open_file(filename, 'r') as f: for group in f.list_nodes(f.root): genome = group._v_name matrices.matrices[genome] = GeneBCMatrix.load(group) return matrices
def load_dims_from_h5(filename): dims = {} with tables.open_file(filename, 'r') as f: for group in f.list_nodes(f.root): genome = group._v_name dims[genome] = GeneBCMatrix.load_dims(group) return dims
def load_chemistry_from_h5(filename): with tables.open_file(filename, 'r') as f: try: chemistry = f.get_node_attr('/', cr_constants.H5_CHEMISTRY_DESC_KEY) except AttributeError: chemistry = "Unknown" return chemistry
def get_matrix_attrs(matrix_h5): attrs = {} with tables.open_file(matrix_h5, 'r') as f: for key in cr_constants.H5_METADATA_ATTRS: try: val = f.get_node_attr('/', key) attrs[key] = val except AttributeError: pass return attrs