我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用tables.Filters()。
def main(args, outs): if args.skip or args.is_multi_genome: return tsne_dims = args.tsne_dims matrix = cr_matrix.GeneBCMatrix.load_h5(args.matrix_h5) pca = cr_pca.load_pca_from_h5(args.pca_h5) tsne = cr_tsne.run_tsne(pca.transformed_pca_matrix, input_pcs=args.input_pcs, perplexity=args.perplexity, theta=args.theta, tsne_dims=tsne_dims, max_iter=args.max_iter, stop_lying_iter=args.stop_lying_iter, mom_switch_iter=args.mom_switch_iter, random_state=args.random_seed) tsne_map = {tsne_dims: tsne} filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) with tables.open_file(outs.tsne_h5, 'w', filters = filters) as f: cr_tsne.save_tsne_h5(tsne_map, f) cr_tsne.save_tsne_csv(tsne_map, matrix, outs.tsne_csv)
def sparse_save(matrix, filename, dtype=np.dtype(np.float64)): print "SAVE SPARSE" print matrix.shape atom = tb.Atom.from_dtype(dtype) f = tb.open_file(filename, 'w') print "saving data" filters = tb.Filters(complevel=5, complib='blosc') out = f.create_carray(f.root, 'data', atom, shape=matrix.data.shape, filters=filters) out[:] = matrix.data print "saving indices" out = f.create_carray(f.root, 'indices', tb.Int64Atom(), shape=matrix.indices.shape, filters=filters) out[:] = matrix.indices print "saving indptr" out = f.create_carray(f.root, 'indptr', tb.Int64Atom(), shape=matrix.indptr.shape, filters=filters) out[:] = matrix.indptr print "saving done" f.close()
def save_file_origen(file, *, ORIGEN_data, lib, nucs, start_nuclide, time, phi, ORIGEN_time, n_fission_fragments=2.004): with tables.open_file(file, mode="a", title="ORIGEN and CRAM data", filters=tables.Filters(complevel=1)) as h5file: if lib not in h5file.root: create_hdf5_table(file, lib, nucs) table = h5file.get_node(h5file.root, lib + '/origen') table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs) table.row['library'] = lib table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments) table.row['time'] = time table.row['phi'] = phi table.row['n_fission_fragments'] = n_fission_fragments table.row['execution time ORIGEN'] = ORIGEN_time table.row['ORIGEN atom fraction'] = origen_data_to_array_weighted(ORIGEN_data, nucs, n_fission_fragments=n_fission_fragments) table.row['ORIGEN mass fraction'] = origen_data_to_array_materials(ORIGEN_data, nucs) table.row.append() table.flush()
def save_file_cram_lambdify(file, *, CRAM_lambdify_res, lib, nucs, start_nuclide, time, phi, CRAM_lambdify_time, umfpack, n_fission_fragments=2.004): assert len(CRAM_lambdify_res) == len(nucs) with tables.open_file(file, mode="a", title="ORIGEN and CRAM data", filters=tables.Filters(complevel=1)) as h5file: if lib not in h5file.root: create_hdf5_table(file, lib, nucs) nodename = '/cram-lambdify-umfpack' if umfpack else '/cram-lambdify-superlu' table = h5file.get_node(h5file.root, lib + nodename) table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs) table.row['library'] = lib table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments) table.row['time'] = time table.row['phi'] = phi table.row['n_fission_fragments'] = n_fission_fragments table.row['execution time CRAM lambdify'] = CRAM_lambdify_time table.row['CRAM lambdify atom fraction'] = CRAM_lambdify_res CRAM_lambdify_res_normalized = CRAM_lambdify_res/np.sum(CRAM_lambdify_res) table.row['CRAM lambdify mass fraction'] = CRAM_lambdify_res_normalized table.row.append() table.flush()
def save_file_cram_py_solve(file, *, CRAM_py_solve_res, lib, nucs, start_nuclide, time, phi, CRAM_py_solve_time, n_fission_fragments=2.004): assert len(CRAM_py_solve_res) == len(nucs) with tables.open_file(file, mode="a", title="ORIGEN and CRAM data", filters=tables.Filters(complevel=1)) as h5file: if lib not in h5file.root: create_hdf5_table(file, lib, nucs) table = h5file.get_node(h5file.root, lib + '/cram-py_solve') table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs) table.row['library'] = lib table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments) table.row['time'] = time table.row['phi'] = phi table.row['n_fission_fragments'] = n_fission_fragments table.row['execution time CRAM py_solve'] = CRAM_py_solve_time table.row['CRAM py_solve atom fraction'] = CRAM_py_solve_res CRAM_py_solve_res_normalized = CRAM_py_solve_res/np.sum(CRAM_py_solve_res) table.row['CRAM py_solve mass fraction'] = CRAM_py_solve_res_normalized table.row.append() table.flush()
def fetch_svhn_extra(source_paths, target_path): extra_path = source_paths[0] print('Converting {} to HDF5 (compressed)...'.format(extra_path)) f_out = tables.open_file(target_path, mode='w') g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data') filters = tables.Filters(complevel=9, complib='blosc') X_u8_arr = f_out.create_earray( g_out, 'extra_X_u8', tables.UInt8Atom(), (0, 3, 32, 32), filters=filters) y_arr = f_out.create_earray( g_out, 'extra_y', tables.Int32Atom(), (0,), filters=filters) # Load in the extra data Matlab file _insert_svhn_matlab_to_h5(X_u8_arr, y_arr, extra_path) f_out.close() return target_path
def __enter__(self): import tables if self.filename is None: self.filedir = tempfile.mkdtemp() self.filename = os.path.join(self.filedir, 'bench.h5') else: self.filedir = None h5_file = tables.open_file(self.filename, 'w') array_kw_args = {} if self.complevel > 0: array_kw_args['filters'] = tables.Filters(complib=self.complib, complevel=self.complevel) array_path = '/bench' #ary = h5_file.create_array(h5_file.root, array_path[1:], # np.arange(np.prod(file_shape), dtype=file_type).reshape(file_shape)) ary = h5_file.create_earray(h5_file.root, array_path[1:], atom=tables.Atom.from_dtype(file_type), shape=file_shape, expectedrows=self.n_rows, **array_kw_args) for _ in range(0, self.n_rows, 2**10): ary.append(2**8*np.random.randn(2**10, *file_shape[1:])) print(ary.shape) h5_file.close() return self.filename, array_path
def create_empty_earray (filenode, groupnode, name, batom = None, expectedrows = None) : try : bfilter = tables.Filters (complevel=ZLIBCOMP, complib='zlib') if expectedrows == None : a = filenode.create_earray (groupnode, name, atom=batom, shape=(0,), filters=bfilter) else : a = filenode.create_earray (groupnode, name, atom=batom, shape=(0,), filters=bfilter, expectedrows=expectedrows) except Exception as e : raise HDF5InteractionError (5, e.message) return a
def merge_all_files_into_pytables(file_dir, file_out): """ process each file into pytables """ start = None start = datetime.datetime.now() out_h5 = tables.openFile(file_out, mode="w", title="bars", filters=tables.Filters(complevel=9, complib='zlib')) table = None for file_in in glob.glob(file_dir + "/*.gz"): gzip_file = gzip.open(file_in) expected_header = ["dt", "sid", "open", "high", "low", "close", "volume"] csv_reader = csv.DictReader(gzip_file) header = csv_reader.fieldnames if header != expected_header: logging.warn("expected header %s\n" % (expected_header)) logging.warn("header_found %s" % (header)) return for current_date, rows in parse_csv(csv_reader): table = out_h5.createTable("/TD", "date_" + current_date, OHLCTableDescription, expectedrows=len(rows), createparents=True) table.append(rows) table.flush() if table is not None: table.flush() end = datetime.datetime.now() diff = (end - start).seconds logging.debug("finished it took %d." % (diff))
def open_h5_for_writing(filename): filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) return tables.open_file(filename, 'w', filters = filters)
def save_h5(self, filename, extra_attrs={}): self.tocsc() filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) with tables.open_file(filename, 'w', filters = filters) as f: f.set_node_attr('/', cr_constants.H5_FILETYPE_KEY, MATRIX_H5_FILETYPE) # set optional top-level attributes for (k,v) in extra_attrs.iteritems(): f.set_node_attr('/', k, v) for genome, matrix in self.matrices.iteritems(): group = f.create_group(f.root, genome) matrix.save_h5(f, group)
def main(args, outs): if args.skip or args.is_multi_genome: return matrix = cr_matrix.GeneBCMatrix.load_h5(args.matrix_h5) pca = cr_pca.run_pca(matrix, pca_genes=args.num_genes, pca_bcs=args.num_bcs, n_pca_components=args.num_pcs, random_state=args.random_seed) pca_key = args.num_pcs if args.num_pcs is not None else cr_constants.PCA_N_COMPONENTS_DEFAULT pca_map = {pca_key: pca} filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) with tables.open_file(outs.pca_h5, 'w', filters = filters) as f: cr_pca.save_pca_h5(pca_map, f) cr_pca.save_pca_csv(pca_map, matrix, outs.pca_csv)
def append(self, key, item): if key not in self.data_: filters = tb.Filters(complevel=5, complib='blosc') if isinstance(item, np.ndarray): atom = tb.Atom.from_type(item.dtype.name, item.shape[1:]) else: atom = tb.VLStringAtom() self.data_[key] = self.h5f_.create_vlarray(self.h5f_.root, key, atom, filters=filters) print('Creating VLArray, and appending to key {}'.format(key)) print self.data_[key] self.data_[key].append(self.pack(item))
def create_hdf5_table(file, lib, nucs): nucs_size = len(nucs) desc_common = [ ('hash', np.int64), ('library', 'S8'), ('initial vector', np.float64, (nucs_size, 1)), ('time', np.float64), ('phi', np.float64), ('n_fission_fragments', np.float64), ] desc_origen = [ ('execution time ORIGEN', np.float64), ('ORIGEN atom fraction', np.float64, (nucs_size, 1)), ('ORIGEN mass fraction', np.float64, (nucs_size, 1)), ] desc_cram_lambdify = [ ('execution time CRAM lambdify', np.float64), ('CRAM lambdify atom fraction', np.float64, (nucs_size, 1)), ('CRAM lambdify mass fraction', np.float64, (nucs_size, 1)), ] desc_cram_py_solve = [ ('execution time CRAM py_solve', np.float64), ('CRAM py_solve atom fraction', np.float64, (nucs_size, 1)), ('CRAM py_solve mass fraction', np.float64, (nucs_size, 1)), ] h5file = tables.open_file(file, mode="a", title="CRAM/ORIGEN test run data", filters=tables.Filters(complevel=1)) h5file.create_group('/', lib, '%s data' % lib) h5file.create_table('/' + lib, 'origen', np.dtype(desc_common + desc_origen)) h5file.create_table('/' + lib, 'cram-lambdify-umfpack', np.dtype(desc_common + desc_cram_lambdify)) h5file.create_table('/' + lib, 'cram-lambdify-superlu', np.dtype(desc_common + desc_cram_lambdify)) h5file.create_table('/' + lib, 'cram-py_solve', np.dtype(desc_common + desc_cram_py_solve)) h5file.create_array('/' + lib, 'nucs', np.array(nucs, 'S6'))
def __init__(self, grid2dfile, filename, name=None): """ Convert grid2d file into a temporary hdf5 file for reducing memory load. Args: grid2dfile: grid2d file object to save filename (str): Path to where file should be saved (recommended it be a temporary dir). name (str): Name of layer, if None, will use filename minus the extension, or if a multihazard grid2d object, each layer will have its own name. """ filename1, file_ext = os.path.splitext(filename) if file_ext != '.hdf5': filename = filename1 + '.hdf5' print('Changed extension from %s to .hdf5' % (file_ext,)) filters = tables.Filters(complevel=5, complib='blosc') with tables.open_file(filename, mode='w') as self.tempfile: self.gdict = grid2dfile.getGeoDict() if type(grid2dfile) == ShakeGrid: for layer in grid2dfile.getLayerNames(): filldat = grid2dfile.getLayer(layer).getData() self.tempfile.create_carray(self.tempfile.root, name=layer, obj=filldat, filters=filters) self.shakedict = grid2dfile.getShakeDict() self.edict = grid2dfile.getEventDict() else: if name is None: name = os.path.basename(filename1) filldat = grid2dfile.getData() self.tempfile.create_carray(self.tempfile.root, name=name, obj=filldat, filters=filters) self.filename = os.path.abspath(filename)
def write_haplotypes(self, h5file): """ Returns an array of genotypes and corresponding chromosome IDs """ filters = tables.Filters(complevel=5, complib='blosc') with tables.open_file(h5file, 'w') as h5: with open(self.output, 'r') as f: line = next(f) ind_ID, haplotypes = parse_simuPOP_genotype(line) ## Create an extendable array in the h5 output file with ## the same shape as the haplotypes h5.create_earray(h5.root, 'haps', atom=tables.IntAtom(shape=(2, haplotypes.shape[2])), shape=(0,), filters=filters) h5.create_earray(h5.root, 'inds', atom=tables.IntAtom(), shape=(0,), filters=filters) h5.root.haps.append(haplotypes) h5.root.inds.append(ind_ID) for line in f: ind_ID, haplotypes = parse_simuPOP_genotype(line) h5.root.haps.append(haplotypes) h5.root.inds.append(ind_ID)
def safe_hdf(array, name): if os.path.isfile(name + '.hdf') and not args.overwrite: logger.warning("Not saving %s, already exists." % (name + '.hdf')) else: if os.path.isfile(name + '.hdf'): logger.info("Overwriting %s." % (name + '.hdf')) else: logger.info("Saving to %s." % (name + '.hdf')) with tables.openFile(name + '.hdf', 'w') as f: atom = tables.Atom.from_dtype(array.dtype) filters = tables.Filters(complib='blosc', complevel=5) ds = f.createCArray(f.root, name.replace('.', ''), atom, array.shape, filters=filters) ds[:] = array
def newdataearray (self, name, data, batom = None, rows = None) : # Use zlib, standard for HDF5 bfilter = tables.Filters (complevel=ZLIBCOMP, complib='zlib', shuffle=True) # a = create_data_earray (self.ph5, self.current_g_das, name, data, batom, rows=rows) return a
def open(filename, mode, start=None, length=None): assert mode == 'r' or mode == 'w' mc = MoleculeCounter() if mode == 'w': assert start is None assert length is None filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL) mc.h5 = tables.open_file(filename, mode = 'w', title = '10X', filters = filters) mc.h5.set_node_attr('/', FILE_VERSION_KEY, CURR_FILE_VERSION) mc.h5.set_node_attr('/', cr_constants.H5_FILETYPE_KEY, MOLECULE_H5_FILETYPE) mc.h5.create_group('/', METRICS_GROUP_NAME) for name, col_type in MOLECULE_INFO_COLUMNS.iteritems(): atom = tables.Atom.from_dtype(np.dtype(col_type)) # Create an (array, element_buffer) tuple # where element_buffer is a len=1 numpy array # designed to avoid excess allocations mc.columns[name] = (mc.h5.create_earray(mc.h5.root, name, atom, (0,)), np.array([0], dtype=np.dtype(col_type))) elif mode == 'r': mc.h5 = tables.open_file(filename, mode = 'r') try: mc.file_version = mc.h5.get_node_attr('/', FILE_VERSION_KEY) except AttributeError: mc.file_version = 1 # V1 doesn't have version field for node in mc.h5.walk_nodes('/', 'Array'): if node.name in MOLECULE_INFO_COLUMNS: if start is None: assert length is None mc.columns[node.name] = (node, None) else: assert length is not None mc.columns[node.name] = (node[start:(start+length)], None) elif node.name in MOLECULE_REF_COLUMNS: mc.ref_columns[node.name] = node else: raise AttributeError("Illegal column: %s" % node.name) return mc
def repeat_expt(smplr, n_expts, n_labels, output_file = None): """ Parameters ---------- smplr : sub-class of PassiveSampler sampler must have a sample_distinct method, reset method and ... n_expts : int number of expts to run n_labels : int number of labels to query from the oracle in each expt """ FILTERS = tables.Filters(complib='zlib', complevel=5) max_iter = smplr._max_iter n_class = smplr._n_class if max_iter < n_labels: raise ValueError("Cannot query {} labels. Sampler ".format(n_labels) + "instance supports only {} iterations".format(max_iter)) if output_file is None: # Use current date/time as filename output_file = 'expt_' + time.strftime("%d-%m-%Y_%H:%M:%S") + '.h5' logging.info("Writing output to {}".format(output_file)) f = tables.open_file(output_file, mode='w', filters=FILTERS) float_atom = tables.Float64Atom() bool_atom = tables.BoolAtom() int_atom = tables.Int64Atom() array_F = f.create_carray(f.root, 'F_measure', float_atom, (n_expts, n_labels, n_class)) array_s = f.create_carray(f.root, 'n_iterations', int_atom, (n_expts, 1)) array_t = f.create_carray(f.root, 'CPU_time', float_atom, (n_expts, 1)) logging.info("Starting {} experiments".format(n_expts)) for i in range(n_expts): if i%np.ceil(n_expts/10).astype(int) == 0: logging.info("Completed {} of {} experiments".format(i, n_expts)) ti = time.process_time() smplr.reset() smplr.sample_distinct(n_labels) tf = time.process_time() if hasattr(smplr, 'queried_oracle_'): array_F[i,:,:] = smplr.estimate_[smplr.queried_oracle_] else: array_F[i,:,:] = smplr.estimate_ array_s[i] = smplr.t_ array_t[i] = tf - ti f.close() logging.info("Completed all experiments")
def check_HDF5_arrays(hdf5_file, N, convergence_iter): """Check that the HDF5 data structure of file handle 'hdf5_file' has all the required nodes organizing the various two-dimensional arrays required for Affinity Propagation clustering ('Responsibility' matrix, 'Availability', etc.). Parameters ---------- hdf5_file : string or file handle Name of the Hierarchical Data Format under consideration. N : int The number of samples in the data-set that will undergo Affinity Propagation clustering. convergence_iter : int Number of iterations with no change in the number of estimated clusters that stops the convergence. """ Worker.hdf5_lock.acquire() with tables.open_file(hdf5_file, 'r+') as fileh: if not hasattr(fileh.root, 'aff_prop_group'): fileh.create_group(fileh.root, "aff_prop_group") atom = tables.Float32Atom() filters = None #filters = tables.Filters(5, 'blosc') for feature in ('availabilities', 'responsibilities', 'similarities', 'temporaries'): if not hasattr(fileh.root.aff_prop_group, feature): fileh.create_carray(fileh.root.aff_prop_group, feature, atom, (N, N), "Matrix of {0} for affinity " "propagation clustering".format(feature), filters = filters) if not hasattr(fileh.root.aff_prop_group, 'parallel_updates'): fileh.create_carray(fileh.root.aff_prop_group, 'parallel_updates', atom, (N, convergence_iter), "Matrix of parallel updates for affinity propagation " "clustering", filters = filters) Worker.hdf5_lock.release()
def create_song_file(h5filename,title='H5 Song File',force=False,complevel=1): """ Create a new HDF5 file for a new song. If force=False, refuse to overwrite an existing file Raise a ValueError if it's the case. Other optional param is the H5 file. Setups the groups, each containing a table 'songs' with one row: - metadata - analysis DETAIL - we set the compression level to 1 by default, it uses the ZLIB library to disable compression, set it to 0 """ # check if file exists if not force: if os.path.exists(h5filename): raise ValueError('file exists, can not create HDF5 song file') # create the H5 file h5 = tables.openFile(h5filename, mode='w', title='H5 Song File') # set filter level h5.filters = tables.Filters(complevel=complevel,complib='zlib') # setup the groups and tables # group metadata group = h5.createGroup("/",'metadata','metadata about the song') table = h5.createTable(group,'songs',DESC.SongMetaData,'table of metadata for one song') r = table.row r.append() # filled with default values 0 or '' (depending on type) table.flush() # group analysis group = h5.createGroup("/",'analysis','Echo Nest analysis of the song') table = h5.createTable(group,'songs',DESC.SongAnalysis,'table of Echo Nest analysis for one song') r = table.row r.append() # filled with default values 0 or '' (depending on type) table.flush() # group musicbrainz group = h5.createGroup("/",'musicbrainz','data about the song coming from MusicBrainz') table = h5.createTable(group,'songs',DESC.SongMusicBrainz,'table of data coming from MusicBrainz') r = table.row r.append() # filled with default values 0 or '' (depending on type) table.flush() # create arrays create_all_arrays(h5,expectedrows=3) # close it, done h5.close()
def create_aggregate_file(h5filename,title='H5 Aggregate File',force=False,expectedrows=1000,complevel=1, summaryfile=False): """ Create a new HDF5 file for all songs. It will contains everything that are in regular song files. Tables created empty. If force=False, refuse to overwrite an existing file Raise a ValueError if it's the case. If summaryfile=True, creates a sumary file, i.e. no arrays Other optional param is the H5 file. DETAILS - if you create a very large file, try to approximate correctly the number of data points (songs), it speeds things up with arrays (by setting the chunking correctly). - we set the compression level to 1 by default, it uses the ZLIB library to disable compression, set it to 0 Setups the groups, each containing a table 'songs' with one row: - metadata - analysis """ # check if file exists if not force: if os.path.exists(h5filename): raise ValueError('file exists, can not create HDF5 song file') # summary file? change title if summaryfile: title = 'H5 Summary File' # create the H5 file h5 = tables.openFile(h5filename, mode='w', title='H5 Song File') # set filter level h5.filters = tables.Filters(complevel=complevel,complib='zlib') # setup the groups and tables # group metadata group = h5.createGroup("/",'metadata','metadata about the song') table = h5.createTable(group,'songs',DESC.SongMetaData,'table of metadata for one song', expectedrows=expectedrows) # group analysis group = h5.createGroup("/",'analysis','Echo Nest analysis of the song') table = h5.createTable(group,'songs',DESC.SongAnalysis,'table of Echo Nest analysis for one song', expectedrows=expectedrows) # group musicbrainz group = h5.createGroup("/",'musicbrainz','data about the song coming from MusicBrainz') table = h5.createTable(group,'songs',DESC.SongMusicBrainz,'table of data coming from MusicBrainz', expectedrows=expectedrows) # create arrays if not summaryfile: create_all_arrays(h5,expectedrows=expectedrows) # close it, done h5.close()