我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用h5py.Dataset()。
def _visitfunc(self, name, node): level = len(name.split('/')) indent = ' '*4*(level-1) #indent = '<span style="color:blue;">'.format(level*4) localname = name.split('/')[-1] #search_text = self.settings['search_text'].lower() search_text = self.search_text if search_text and (search_text in localname.lower()): localname = """<span style="color: red;">{}</span>""".format(localname) if isinstance(node, h5py.Group): self.tree_str += indent +"|> <b>{}/</b><br/>".format(localname) elif isinstance(node, h5py.Dataset): self.tree_str += indent +"|D <b>{}</b>: {} {}<br/>".format(localname, node.shape, node.dtype) for key, val in node.attrs.items(): if search_text: if search_text in str(key).lower(): key = """<span style="color: red;">{}</span>""".format(key) if search_text in str(val).lower(): val = """<span style="color: red;">{}</span>""".format(val) self.tree_str += indent+" |- <i>{}</i> = {}<br/>".format(key, val)
def get_all_hdf_dataset(hdf, fileter_func=None, path='/'): res = [] # init queue q = queue() for i in hdf[path].keys(): q.put(i) # get list of all file while not q.empty(): p = q.pop() if 'Dataset' in str(type(hdf[p])): if fileter_func is not None and not fileter_func(p): continue res.append(p) elif 'Group' in str(type(hdf[p])): for i in hdf[p].keys(): q.put(p + '/' + i) return res
def from_hdf5(fpath, dataset="data"): """Loading data from hdf5 files that was stored by \ :meth:`~wradlib.io.to_hdf5` Parameters ---------- fpath : string path to the hdf5 file dataset : string name of the Dataset in which the data is stored """ f = h5py.File(fpath, mode="r") # Check whether Dataset exists if dataset not in f.keys(): print("Cannot read Dataset <%s> from hdf5 file <%s>" % (dataset, f)) f.close() sys.exit() data = np.array(f[dataset][:]) # get metadata metadata = {} for key in f[dataset].attrs.keys(): metadata[key] = f[dataset].attrs[key] f.close() return data, metadata
def setup_openpmd_species_record( self, grp, quantity ) : """ Set the attributes that are specific to a species record Parameter --------- grp : an h5py.Group object or h5py.Dataset The group that correspond to `quantity` (in particular, its path must end with "/<quantity>") quantity : string The name of the record being setup e.g. "position", "momentum" """ # Generic setup self.setup_openpmd_record( grp, quantity ) # Weighting information grp.attrs["macroWeighted"] = macro_weighted_dict[quantity] grp.attrs["weightingPower"] = weighting_power_dict[quantity]
def read_h5(fn): """Read h5 file into dict. Dict keys are the group + dataset names, e.g. '/a/b/c/dset'. All keys start with a leading slash even if written without (see :func:`write_h5`). Parameters ---------- fn : str filename Examples -------- >>> read_h5('foo.h5').keys() ['/a/b/d1', '/a/b/d2', '/a/c/d3', '/x/y/z'] """ fh = h5py.File(fn, mode='r') dct = {} def get(name, obj, dct=dct): if isinstance(obj, h5py.Dataset): _name = name if name.startswith('/') else '/'+name dct[_name] = obj.value fh.visititems(get) fh.close() return dct
def _resize_stacks(self, stack_length, group_prefix="/"): if group_prefix == "/": log_info(logger, self._log_prefix + "Resize datasets to new length: %i" % stack_length) if stack_length == 0: log_warning(logger, self._log_prefix + "Cannot resize stacks to length 0. Skip resize stacks.") return keys = self._f[group_prefix].keys() keys.sort() for k in keys: name = group_prefix + k if isinstance(self._f[name], h5py.Dataset): if self._is_stack(name): self._resize_stack(stack_length, name) else: self._resize_stacks(stack_length, name + "/") self._stack_length = stack_length
def h5py_dataset_iterator(self,g, prefix=''): for key in g.keys(): item = g[key] path = '{}/{}'.format(prefix, key) keys = [i for i in item.keys()] if isinstance(item[keys[0]], h5py.Dataset): # test for dataset data = {'path':path} for k in keys: if not isinstance(item[k], h5py.Group): dataset = np.array(item[k].value) if type(dataset) is np.ndarray: if dataset.size != 0: if type(dataset[0]) is np.bytes_: dataset = [a.decode('ascii') for a in dataset] data.update({k:dataset}) yield data else: # test for group (go down) yield from self.h5py_dataset_iterator(item, path)
def load_h5(filename): '''Load data from an hdf5 file created by `save_h5`. Parameters ---------- filename : str Path to the hdf5 file Returns ------- data : dict The key-value data stored in `filename` See Also -------- save_h5 ''' data = {} def collect(k, v): if isinstance(v, h5py.Dataset): data[k] = v.value with h5py.File(filename, mode='r') as hf: hf.visititems(collect) return data
def _populate_data(self, ret_dict, obj, name): """Read data recursively from an HDF5 value and add it to `ret_dict`. If `obj` is a dataset, it is added to `ret_dict`. If `obj` is a group, a sub-dictionary is created in `ret_dict` for `obj` and populated recursively by calling this function on all of the items in the `obj` group. Parameters ---------- ret_dict : OrderedDict Dictionary to which metadata will be added. obj : h5py.Dataset | h5py.Group HDF5 value from which to read metadata. name : valid dictionary key Dictionary key in `ret_dict` under which to store the data from `obj`. """ if isinstance(obj, h5py.Dataset): # [()] casts a Dataset as a numpy array ret_dict[name] = obj[()] else: # create a dictionary for this group ret_dict[name] = {} for key, value in obj.items(): self._populate_data(ret_dict[name], value, key)
def slice_array(X, start=None, stop=None): if type(X) == list: if hasattr(start, '__len__'): return [x[start] for x in X] else: return [x[start:stop] for x in X] if H5PY_SUPPORTED: if type(X) == h5py.Dataset: return [X[i] for i in start] if hasattr(start, '__len__'): return X[start] else: return X[start:stop]
def Dataset(self): if self._Dataset is None: try: from netCDF4 import Dataset except ImportError: Dataset = NotAModule(self._name) self._Dataset = Dataset return self._Dataset
def Dataset(self): if self._err: raise self._err if self._Dataset is None: try: from h5py import Dataset except ImportError: Dataset = NotAModule(self._name) self._Dataset = Dataset return self._Dataset
def __traverse_add(self, item, filename): if isinstance(item, h5py.Dataset): self.add_dataset(item, filename + item.name) elif isinstance(item, h5py.Group): for k in item: self.__traverse_add(item[k], filename) else: print("Skipping " + item.name)
def _convert_to_np_dtype(dset): """ Given an HDF5 dataset, return the values in a numpy-builtin datatype Parameters ---------- dset : h5py.Dataset HDF5 (h5py) dataset Returns ------- out : numpy.ndarray (dtype = numpy built-in) Note ---- The software accounts for big-/little-endianness, and the inability of \ hdf5 to natively store complex numbers. """ assert isinstance(dset, _h5py.Dataset), 'Input is not of type h5py.Dataset' # Single datatype if len(dset.dtype) == 0: converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('=')) dset.read_direct(converted) if issubclass(converted.dtype.type, _np.integer): # Integer to float converted = converted.astype(_np.float) return converted #Compound datatype of length 2-- assumed ('Re','Im') elif len(dset.dtype) == 2: print('Warning: h5py.complex_names set incorrectly using \'{}\' and \'{}\' \ for Re and Im, respectively'.format(dset.dtype.names[0], dset.dtype.names[1])) _h5py.get_config().complex_names = (dset.dtype.names[0],dset.dtype.names[1]) dset = dset.file[dset.name] converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('=')) dset.read_direct(converted) # Unknown datatype else: print('Warning: Unknown datatype. Returning dataset values as is.') return dset.value return converted
def check_serialize(self, data): ret = self.serializer('w', data) dset = self.hdf5file['w'] self.assertIsInstance(dset, h5py.Dataset) self.assertEqual(dset.shape, data.shape) self.assertEqual(dset.size, data.size) self.assertEqual(dset.dtype, data.dtype) read = numpy.empty((2, 3), dtype=numpy.float32) dset.read_direct(read) numpy.testing.assert_array_equal(read, cuda.to_cpu(data)) self.assertEqual(dset.compression_opts, 3) self.assertIs(ret, data)
def test_serialize_scalar(self): ret = self.serializer('x', 10) dset = self.hdf5file['x'] self.assertIsInstance(dset, h5py.Dataset) self.assertEqual(dset.shape, ()) self.assertEqual(dset.size, 1) self.assertEqual(dset.dtype, int) read = numpy.empty((), dtype=numpy.int32) dset.read_direct(read) self.assertEqual(read, 10) self.assertEqual(dset.compression_opts, None) self.assertIs(ret, 10)
def loadDataHDF5(data): if isinstance(data,h5py.File) or isinstance(data,h5py.Group): return {k:loadDataHDF5(v) for k,v in data.iteritems()} elif isinstance(data,h5py.Dataset): return data.value else: print 'unhandled datatype: %s' % type(data)
def _visitfunc(self, name, node): level = len(name.split('/')) indent = ' '*level localname = name.split('/')[-1] if isinstance(node, h5py.Group): self.tree_str += indent +"|> {}\n".format(localname) elif isinstance(node, h5py.Dataset): self.tree_str += indent +"|D {}: {} {}\n".format(localname, node.shape, node.dtype) for key, val in node.attrs.items(): self.tree_str += indent+" |- {} = {}\n".format(key, val)
def __init__(self, dataset, hdf=None, dtype=None, shape=None): super(Hdf5Data, self).__init__() raise Exception("Hdf5Data is under-maintanance!") # default chunks size is 32 (reduce complexity of the works) self._chunk_size = 32 if isinstance(hdf, str): hdf = open_hdf5(hdf) if hdf is None and not isinstance(dataset, h5py.Dataset): raise ValueError('Cannot initialize dataset without hdf file') if isinstance(dataset, h5py.Dataset): self._data = dataset self._hdf = dataset.file else: if dataset not in hdf: # not created dataset if dtype is None or shape is None: raise ValueError('dtype and shape must be specified if ' 'dataset has not created in hdf5 file.') shape = tuple([0 if i is None else i for i in shape]) hdf.create_dataset(dataset, dtype=dtype, chunks=_get_chunk_size(shape, self._chunk_size), shape=shape, maxshape=(None, ) + shape[1:]) self._data = hdf[dataset] if shape is not None and self._data[0].shape[1:] != shape[1:]: raise ValueError('Shape mismatch between predefined dataset ' 'and given shape, {} != {}' ''.format(shape, self._data[0].shape)) self._hdf = hdf # ==================== properties ==================== #
def collect_metadata(self, name, obj): if isinstance(obj, h5py.Dataset): self.file_content[name] = obj self.file_content[name + "/dtype"] = obj.dtype self.file_content[name + "/shape"] = obj.shape self._collect_attrs(name, obj.attrs)
def __getitem__(self, key): val = self.file_content[key] if isinstance(val, h5py.Dataset): # these datasets are closed and inaccessible when the file is closed, need to reopen return h5py.File(self.filename, 'r')[key].value return val
def setup_openpmd_species_component( self, grp, quantity ) : """ Set the attributes that are specific to a species component Parameter --------- grp : an h5py.Group object or h5py.Dataset quantity : string The name of the component """ self.setup_openpmd_component( grp )
def setup_openpmd_record( self, dset, quantity ) : """ Sets the attributes of a record, that comply with OpenPMD Parameter --------- dset : an h5py.Dataset or h5py.Group object quantity : string The name of the record considered """ dset.attrs["unitDimension"] = unit_dimension_dict[quantity] # No time offset (approximation) dset.attrs["timeOffset"] = 0.
def setup_openpmd_component( self, dset ) : """ Sets the attributes of a component, that comply with OpenPMD Parameter --------- dset : an h5py.Dataset or h5py.Group object """ dset.attrs["unitSI"] = 1.
def reconstruct(particles, events=-1, config='delphes_card_ATLAS_NoFastJet.tcl', objects='Calorimeter/towers', random_state=0): if not os.path.exists(config): internal_config = os.path.join( os.environ.get('DEEPJETS_DIR'), 'config', 'delphes', config) if not os.path.isabs(config) and os.path.exists(internal_config): log.warning("{0} does not exist but using internal " "config with the same name instead: {1}".format( config, internal_config)) config = internal_config else: raise IOError("Delphes config not found: {0}".format(config)) delphes = DelphesWrapper(config, random_state, objects) kwargs = dict() if isinstance(particles, MCInput): reco_func = reconstruct_mc elif isinstance(particles, h5.Dataset): reco_func = reconstruct_hdf5 else: reco_func = reconstruct_iterable kwargs['events'] = events if not inspect.isgenerator(particles) and not isinstance(particles, list): # handle case where input is just one event particles = [particles] for event in reco_func(delphes, particles, **kwargs): yield event
def __getitem__(self, key): h5py_item = self.h5py_group[key] if isinstance(h5py_item, h5py.Group): if 'h5sparse_format' in h5py_item.attrs: # detect the sparse matrix return Dataset(h5py_item) else: return Group(h5py_item) elif isinstance(h5py_item, h5py.Dataset): return h5py_item else: raise ValueError("Unexpected item type.")
def create_dataset(self, name, shape=None, dtype=None, data=None, format='csr', indptr_dtype=np.int64, indices_dtype=np.int32, **kwargs): """Create 4 datasets in a group to represent the sparse array.""" if data is None: raise NotImplementedError("Only support create_dataset with " "existed data.") elif isinstance(data, Dataset): group = self.h5py_group.create_group(name) group.attrs['h5sparse_format'] = data.h5py_group.attrs['h5sparse_format'] group.attrs['h5sparse_shape'] = data.h5py_group.attrs['h5sparse_shape'] group.create_dataset('data', data=data.h5py_group['data'], dtype=dtype, **kwargs) group.create_dataset('indices', data=data.h5py_group['indices'], dtype=indices_dtype, **kwargs) group.create_dataset('indptr', data=data.h5py_group['indptr'], dtype=indptr_dtype, **kwargs) else: group = self.h5py_group.create_group(name) group.attrs['h5sparse_format'] = get_format_str(data) group.attrs['h5sparse_shape'] = data.shape group.create_dataset('data', data=data.data, dtype=dtype, **kwargs) group.create_dataset('indices', data=data.indices, dtype=indices_dtype, **kwargs) group.create_dataset('indptr', data=data.indptr, dtype=indptr_dtype, **kwargs) return Dataset(group)
def initialize_from(self, filename, ob_stat=None): """ Initializes weights from another policy, which must have the same architecture (variable names), but the weight arrays can be smaller than the current policy. """ with h5py.File(filename, 'r') as f: f_var_names = [] f.visititems(lambda name, obj: f_var_names.append(name) if isinstance(obj, h5py.Dataset) else None) assert set(v.name for v in self.all_variables) == set(f_var_names), 'Variable names do not match' init_vals = [] for v in self.all_variables: shp = v.get_shape().as_list() f_shp = f[v.name].shape assert len(shp) == len(f_shp) and all(a >= b for a, b in zip(shp, f_shp)), \ 'This policy must have more weights than the policy to load' init_val = v.eval() # ob_mean and ob_std are initialized with nan, so set them manually if 'ob_mean' in v.name: init_val[:] = 0 init_mean = init_val elif 'ob_std' in v.name: init_val[:] = 0.001 init_std = init_val # Fill in subarray from the loaded policy init_val[tuple([np.s_[:s] for s in f_shp])] = f[v.name] init_vals.append(init_val) self.set_all_vars(*init_vals) if ob_stat is not None: ob_stat.set_from_init(init_mean, init_std, init_count=1e5)
def _write_by_chunk(dset, arrs): # Note: arrs should be a generator for performance reasons. assert isinstance(dset, Dataset) # Start the data. offset = 0 for arr in arrs: n = arr.shape[0] arr = arr[...] # Match the shape of the chunk array with the dset shape. assert arr.shape == (n,) + dset.shape[1:] dset[offset:offset + n, ...] = arr offset += arr.shape[0] # Check that the copy is complete. assert offset == dset.shape[0]
def datasets(self, path='/'): """Return the list of datasets under a given node.""" return [key for key in self.children(path) if isinstance(self._h5py_file[path + '/' + key], h5py.Dataset)] # Miscellaneous properties #--------------------------------------------------------------------------
def _print_node_info(self, name, node): """Print node information.""" info = ('/' + name).ljust(50) if isinstance(node, h5py.Group): pass elif isinstance(node, h5py.Dataset): info += str(node.shape).ljust(20) info += str(node.dtype).ljust(8) print(info)
def _write_to_f(self, name, data): if data is None: log_warning(logger, "Data %s is None! Skipping this item as we cannot write this data type." % name) elif name in self._f: log_warning(logger, "Dataset %s already exists! Overwriting with new data." % name) else: log_debug(logger, "Writing to dataset %s." % name) self._f[name] = data
def _create_dataset(self, data, name): data = numpy.asarray(data) try: h5py.h5t.py_create(data.dtype, logical=1) except TypeError: log_and_raise_error(logger, self._log_prefix + "Could not save dataset %s. Conversion to numpy array failed" % (name)) return 1 if data.nbytes == 0: log_and_raise_error(logger, self._log_prefix + "Could not save dataset %s. Dataset is empty" % (name)) return 1 maxshape = tuple([None]+list(data.shape)) shape = tuple([self._chunksize]+list(data.shape)) dtype = data.dtype if dtype.type is numpy.string_: dtype = h5py.new_vlen(str) nbytes_chunk = numpy.prod(shape) * dtype.itemsize if nbytes_chunk > CHUNKSIZE_MIN_IN_BYTES: chunksize = self._chunksize #log_debug(logger, self._log_prefix + "Do not increase chunksize (%i) for dataset %s (%i bytes for single data frame)" % (self._chunksize, name, nbytes_chunk)) else: chunksize = int(numpy.ceil(float(CHUNKSIZE_MIN_IN_BYTES) / float(data.nbytes))) log_debug(logger, self._log_prefix + "Increase chunksize from %i to %i for dataset %s (only %i bytes for single data frame)" % (self._chunksize, chunksize, name, nbytes_chunk)) chunksize = min([chunksize, CHUNKSIZE_MAX_IN_FRAMES]) chunks = tuple([chunksize]+list(data.shape)) ndim = data.ndim axes = "experiment_identifier" if ndim == 1: axes = axes + ":x" elif ndim == 2: axes = axes + ":y:x" elif ndim == 3: axes = axes + ":z:y:x" log_debug(logger, self._log_prefix + "Create dataset %s [shape=%s, chunks=%s, dtype=%s]" % (name, str(shape), str(chunks), str(dtype))) self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, chunks=chunks) self._f[name].attrs.modify("axes",[axes]) return 0
def dataset_append(dataset, arr): """ Append an array to an h5py dataset. Parameters ---------- dataset : h5py.Dataset Dataset to extend. Must be resizable in its first dimension. arr : numpy.ndarray Array to append. All dimensions of `arr` other than the first dimension must be the same as those of the dataset. """ assert isinstance(dataset, h5py.Dataset) assert isinstance(arr, np.ndarray) # Save leading dimension of stored array: maxshape = list(dataset.shape) old_ld_dim = maxshape[0] # Extend leading dimension of stored array to accommodate new array: maxshape[0] += arr.shape[0] dataset.resize(maxshape) # Compute slices to use when assigning `arr` to array extension: slices = [slice(old_ld_dim, None)] for s in maxshape[1:]: slices.append(slice(None, None)) # Convert list of slices to tuple because __setitem__ can # only handle "simple" indexes: slices = tuple(slices) dataset.__setitem__(slices, arr)
def __getitem__(self, key): h5py_item = self.h5py_group[key] if isinstance(h5py_item, h5py.Group): if 'h5sparse_format' in h5py_item.attrs: # detect the sparse matrix return SparseDataset(h5py_item) else: return Group(h5py_item) elif isinstance(h5py_item, h5py.Dataset): return h5py_item else: raise ValueError("Unexpected item type.")
def setUp(self): TestCase.setUp(self) filename = self.getFileName("dataset_testempty") print("filename:", filename) """ self.f = h5py.File(filename, 'w') sid = h5py.h5s.create(h5py.h5s.NULL) tid = h5py.h5t.C_S1.copy() tid.set_size(10) dsid = h5py.h5d.create(self.f.id, b'x', tid, sid) self.dset = h5py.Dataset(dsid) """
def arf2bark(arf_file, root_parent, timezone, verbose): with arf.open_file(arf_file, 'r') as af: # root root_dirname = os.path.splitext(arf_file)[0] root_path = os.path.join(os.path.abspath(root_parent), root_dirname) os.mkdir(root_path) root = bark.Root(root_path) if verbose: print('Created Root: ' + root_path) tle = None found_trigin = False for ename, entry in af.items(): # entries and top-level datasets if isinstance(entry, h5py.Group): # entries entry_path = os.path.join(root_path, ename) entry_attrs = copy_attrs(entry.attrs) timestamp = entry_attrs.pop('timestamp') if timezone: timestamp = bark.convert_timestamp(timestamp, timezone) else: timestamp = bark.convert_timestamp(timestamp) bark_entry = bark.create_entry(entry_path, timestamp, parents=False, **entry_attrs) if verbose: print('Created Entry: ' + entry_path) for ds_name, dataset in entry.items(): # entry-level datasets if ds_name == 'trig_in': # accessing trig_in -> segfault found_trigin = True # and skip the dataset else: transfer_dset(ds_name, dataset, entry_path, verbose) elif isinstance(entry, h5py.Dataset): # top-level datasets if tle is None: path = os.path.join(root_path, 'top_level') tle = bark.create_entry(path, 0, parents=False).path transfer_dset(ename, entry, tle, verbose) if found_trigin: print('Warning: found datasets named "trig_in". Jill-created ' + '"trig_in" datasets segfault when read, so these datasets' + ' were skipped. If you know the datasets are good, rename' + ' them and try again.') return bark.Root(root_path)
def hdf_is_valid_dsets(pth, filename, dset_list): """ Validate file and datasets exist. Return boolean as to whether valid """ # Join path and filename in an os-independant way pfname = _os.path.normpath(_os.path.join(pth, filename)) isvalid = False fileexists = False try: f = _h5py.File(pfname, 'r') print('File exists: \'{}\''.format(pfname)) fileexists = True except OSError: print('File does not exist: \'{}\''.format(pfname)) fileexists = False else: if isinstance(dset_list, list): # List of dataset(s) try: for count in dset_list: f[count] except: print('dataset: {} is invalid'.format(count)) else: print('All datasets are valid') isvalid = True elif isinstance(dset_list, str): # Single dataset string name try: f[dset_list] except: print('dataset {} is invalid'.format(count)) else: print('Dataset is valid') isvalid = True else: print('dset_list is unrecognized type') finally: if fileexists: f.close() return isvalid
def _readMap(self, resolution=None): """ Temporarily store h5py.Dataset object to GCMAP.matrix and update all attributes for given map. """ if self.groupName not in self.hdf5: raise util.MapNotFoundError(' [{0}] dataset not found in [{1}] file...'.format(self.groupName, self.hdf5.filename)) # determining finest resolution map self.binsizes = [] for key in self.hdf5[self.groupName].keys(): if 'bNoData' not in key: self.binsizes.append( self.hdf5[self.groupName][key].attrs['binsize'] ) self.binsizes = sorted(self.binsizes) # At the start, always choose finest resolution self.finestResolution = util.binsizeToResolution(self.binsizes[0]) if resolution is not None: resolutionList = list( map(util.binsizeToResolution, self.binsizes ) ) if resolution in resolutionList: self.resolution = resolution else: raise util.ResolutionNotFoundError (' "{0}" resolution not found for "{1}" in file: "{2}".'.format(resolution, self.groupName, self.hdf5.filename ) ) else: self.resolution = self.finestResolution self.dtype = self.hdf5[self.groupName][self.resolution].dtype for key in ['xlabel', 'ylabel']: self.__dict__[key] = self.hdf5[self.groupName].attrs[key] for key in ['minvalue', 'maxvalue', 'binsize']: self.__dict__[key] = self.hdf5[self.groupName][self.resolution].attrs[key] self.shape = (self.hdf5[self.groupName][self.resolution].attrs['xshape'], self.hdf5[self.groupName][self.resolution].attrs['yshape']) self.xticks = [0, self.shape[0]*self.binsize] self.yticks = [0, self.shape[1]*self.binsize] self.title = self.xlabel + '_vs_' + self.ylabel if self.resolution+'-bNoData' in self.hdf5[self.groupName]: self.bNoData = np.asarray( self.hdf5[self.groupName][self.resolution+'-bNoData'][:], dtype=np.bool ) self.matrix = self.hdf5[self.groupName][self.resolution]
def read_generic_hdf5(fname): """Reads hdf5 files according to their structure In contrast to other file readers under :meth:`wradlib.io`, this function will *not* return a two item tuple with (data, metadata). Instead, this function returns ONE dictionary that contains all the file contents - both data and metadata. The keys of the output dictionary conform to the Group/Subgroup directory branches of the original file. Parameters ---------- fname : string a hdf5 file path Returns ------- output : dict a dictionary that contains both data and metadata according to the original hdf5 file structure Examples -------- See :ref:`notebooks/fileio/wradlib_radar_formats.ipynb#Generic-HDF5`. """ f = h5py.File(fname, "r") fcontent = {} def filldict(x, y): # create a new container tmp = {} # add attributes if present if len(y.attrs) > 0: tmp['attrs'] = dict(y.attrs) # add data if it is a dataset if isinstance(y, h5py.Dataset): tmp['data'] = np.array(y) # only add to the dictionary, if we have something meaningful to add if tmp != {}: fcontent[x] = tmp f.visititems(filldict) f.close() return fcontent
def read_OPERA_hdf5(fname): """Reads hdf5 files according to OPERA conventions Please refer to the OPERA data model documentation :cite:`OPERA-data-model` in order to understand how an hdf5 file is organized that conforms to the OPERA ODIM_H5 conventions. In contrast to other file readers under :meth:`wradlib.io`, this function will *not* return a two item tuple with (data, metadata). Instead, this function returns ONE dictionary that contains all the file contents - both data and metadata. The keys of the output dictionary conform to the Group/Subgroup directory branches of the original file. If the end member of a branch (or path) is "data", then the corresponding item of output dictionary is a numpy array with actual data. Any other end member (either *how*, *where*, and *what*) will contain the meta information applying to the corresponding level of the file hierarchy. Parameters ---------- fname : string a hdf5 file path Returns ------- output : dict a dictionary that contains both data and metadata according to the original hdf5 file structure """ f = h5py.File(fname, "r") # now we browse through all Groups and Datasets and store the info in one # dictionary fcontent = {} def filldict(x, y): if isinstance(y, h5py.Group): if len(y.attrs) > 0: fcontent[x] = dict(y.attrs) elif isinstance(y, h5py.Dataset): fcontent[x] = np.array(y) f.visititems(filldict) f.close() return fcontent
def cluster(inputs, events=-1, skip_failed=True, eta_max=5., jet_size=1.0, subjet_size=0.3, subjet_pt_min_fraction=0.05, subjet_dr_min=0., trimmed_pt_min=-1., trimmed_pt_max=-1., trimmed_mass_min=-1., trimmed_mass_max=-1., shrink=False, shrink_mass=-1, compute_auxvars=False): """ Cluster particles into jets. Inputs may be an MCInput, h5py Dataset, an array of particles (single event) or a generator that yields events of particles. The events and skip_failed arguments are only applied in the case that inputs is a generator function. """ if jet_size <= 0: raise ValueError("jet_size must be greater than zero") if subjet_size <= 0 or subjet_size > 0.5 * jet_size: raise ValueError( "subjet_size must be in the range (0, 0.5 * jet_size]") kwargs = dict( eta_max=eta_max, jet_size=jet_size, subjet_size=subjet_size, subjet_pt_min_fraction=subjet_pt_min_fraction, subjet_dr_min=subjet_dr_min, trimmed_pt_min=trimmed_pt_min, trimmed_pt_max=trimmed_pt_max, trimmed_mass_min=trimmed_mass_min, trimmed_mass_max=trimmed_mass_max, shrink=shrink, shrink_mass=shrink_mass, compute_auxvars=compute_auxvars) if isinstance(inputs, MCInput): cluster_func = cluster_mc elif isinstance(inputs, h5.Dataset): cluster_func = cluster_hdf5 else: cluster_func = cluster_iterable kwargs['events'] = events kwargs['skip_failed'] = skip_failed if not inspect.isgenerator(inputs) and not isinstance(inputs, list): # handle case where input is just one event inputs = [inputs] for event in cluster_func(inputs, **kwargs): yield event
def rlencode(array, chunksize=None): """ Run length encoding. Based on http://stackoverflow.com/a/32681075, which is based on the rle function from R. TAKEN FROM COOLER Parameters ---------- x : 1D array_like Input array to encode dropna: bool, optional Drop all runs of NaNs. Returns ------- start positions, run lengths, run values """ where = np.flatnonzero if not isinstance(array, h5py.Dataset): array = np.asarray(array) n = len(array) if n == 0: return (np.array([], dtype=int), np.array([], dtype=int), np.array([], dtype=array.dtype)) if chunksize is None: chunksize = n starts, values = [], [] last_val = np.nan for i in range(0, n, chunksize): x = array[i:i+chunksize] locs = where(x[1:] != x[:-1]) + 1 if x[0] != last_val: locs = np.r_[0, locs] starts.append(i + locs) values.append(x[locs]) last_val = x[-1] starts = np.concatenate(starts) lengths = np.diff(np.r_[starts, n]) values = np.concatenate(values) return starts, lengths, values