Python h5py 模块,Dataset() 实例源码

我们从Python开源项目中,提取了以下43个代码示例,用于说明如何使用h5py.Dataset()

项目:FoundryDataBrowser    作者:ScopeFoundry    | 项目源码 | 文件源码
def _visitfunc(self, name, node):

        level = len(name.split('/'))
        indent = ' '*4*(level-1)

        #indent = '<span style="color:blue;">'.format(level*4)
        localname = name.split('/')[-1]

        #search_text = self.settings['search_text'].lower()
        search_text = self.search_text
        if search_text and (search_text in localname.lower()):
            localname = """<span style="color: red;">{}</span>""".format(localname)

        if isinstance(node, h5py.Group):
            self.tree_str += indent +"|> <b>{}/</b><br/>".format(localname)
        elif isinstance(node, h5py.Dataset):
            self.tree_str += indent +"|D <b>{}</b>: {} {}<br/>".format(localname, node.shape, node.dtype)
        for key, val in node.attrs.items():
            if search_text:
                if search_text in str(key).lower(): 
                    key = """<span style="color: red;">{}</span>""".format(key)
                if search_text in str(val).lower(): 
                    val = """<span style="color: red;">{}</span>""".format(val)
            self.tree_str += indent+"&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;|- <i>{}</i> = {}<br/>".format(key, val)
项目:odin    作者:imito    | 项目源码 | 文件源码
def get_all_hdf_dataset(hdf, fileter_func=None, path='/'):
  res = []
  # init queue
  q = queue()
  for i in hdf[path].keys():
    q.put(i)
  # get list of all file
  while not q.empty():
    p = q.pop()
    if 'Dataset' in str(type(hdf[p])):
      if fileter_func is not None and not fileter_func(p):
        continue
      res.append(p)
    elif 'Group' in str(type(hdf[p])):
      for i in hdf[p].keys():
        q.put(p + '/' + i)
  return res
项目:wradlib    作者:wradlib    | 项目源码 | 文件源码
def from_hdf5(fpath, dataset="data"):
    """Loading data from hdf5 files that was stored by \
    :meth:`~wradlib.io.to_hdf5`

    Parameters
    ----------
    fpath : string
        path to the hdf5 file
    dataset : string
        name of the Dataset in which the data is stored
    """
    f = h5py.File(fpath, mode="r")
    # Check whether Dataset exists
    if dataset not in f.keys():
        print("Cannot read Dataset <%s> from hdf5 file <%s>" % (dataset, f))
        f.close()
        sys.exit()
    data = np.array(f[dataset][:])
    # get metadata
    metadata = {}
    for key in f[dataset].attrs.keys():
        metadata[key] = f[dataset].attrs[key]
    f.close()
    return data, metadata
项目:fbpic    作者:fbpic    | 项目源码 | 文件源码
def setup_openpmd_species_record( self, grp, quantity ) :
        """
        Set the attributes that are specific to a species record

        Parameter
        ---------
        grp : an h5py.Group object or h5py.Dataset
            The group that correspond to `quantity`
            (in particular, its path must end with "/<quantity>")

        quantity : string
            The name of the record being setup
            e.g. "position", "momentum"
        """
        # Generic setup
        self.setup_openpmd_record( grp, quantity )

        # Weighting information
        grp.attrs["macroWeighted"] = macro_weighted_dict[quantity]
        grp.attrs["weightingPower"] = weighting_power_dict[quantity]
项目:pwtools    作者:elcorto    | 项目源码 | 文件源码
def read_h5(fn):
    """Read h5 file into dict.

    Dict keys are the group + dataset names, e.g. '/a/b/c/dset'. All keys start
    with a leading slash even if written without (see :func:`write_h5`).

    Parameters
    ----------
    fn : str
        filename

    Examples
    --------
    >>> read_h5('foo.h5').keys()
    ['/a/b/d1', '/a/b/d2', '/a/c/d3', '/x/y/z']
    """
    fh = h5py.File(fn, mode='r') 
    dct = {}
    def get(name, obj, dct=dct):
        if isinstance(obj, h5py.Dataset):
            _name = name if name.startswith('/') else '/'+name
            dct[_name] = obj.value
    fh.visititems(get)            
    fh.close()
    return dct
项目:h5writer    作者:mhantke    | 项目源码 | 文件源码
def _resize_stacks(self, stack_length, group_prefix="/"):
        if group_prefix == "/":
            log_info(logger, self._log_prefix + "Resize datasets to new length: %i" % stack_length)
        if stack_length == 0:
            log_warning(logger, self._log_prefix + "Cannot resize stacks to length 0. Skip resize stacks.")
            return
        keys = self._f[group_prefix].keys()
        keys.sort()
        for k in keys:
            name = group_prefix + k
            if isinstance(self._f[name], h5py.Dataset):
                if self._is_stack(name):
                    self._resize_stack(stack_length, name)
            else:
                self._resize_stacks(stack_length, name + "/")
        self._stack_length = stack_length
项目:ANI1_dataset    作者:isayev    | 项目源码 | 文件源码
def h5py_dataset_iterator(self,g, prefix=''):
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
                data = {'path':path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k].value)

                        if type(dataset) is np.ndarray:
                            if dataset.size != 0:
                                if type(dataset[0]) is np.bytes_:
                                    dataset = [a.decode('ascii') for a in dataset]

                        data.update({k:dataset})

                yield data
            else: # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path)
项目:crema    作者:bmcfee    | 项目源码 | 文件源码
def load_h5(filename):
    '''Load data from an hdf5 file created by `save_h5`.

    Parameters
    ----------
    filename : str
        Path to the hdf5 file

    Returns
    -------
    data : dict
        The key-value data stored in `filename`

    See Also
    --------
    save_h5
    '''
    data = {}

    def collect(k, v):
        if isinstance(v, h5py.Dataset):
            data[k] = v.value

    with h5py.File(filename, mode='r') as hf:
        hf.visititems(collect)

    return data
项目:digital_rf    作者:MITHaystack    | 项目源码 | 文件源码
def _populate_data(self, ret_dict, obj, name):
        """Read data recursively from an HDF5 value and add it to `ret_dict`.

        If `obj` is a dataset, it is added to `ret_dict`. If `obj` is a group,
        a sub-dictionary is created in `ret_dict` for `obj` and populated
        recursively by calling this function on all of  the items in the `obj`
        group.

        Parameters
        ----------

        ret_dict : OrderedDict
            Dictionary to which metadata will be added.

        obj : h5py.Dataset | h5py.Group
            HDF5 value from which to read metadata.

        name : valid dictionary key
            Dictionary key in `ret_dict` under which to store the data from
            `obj`.

        """
        if isinstance(obj, h5py.Dataset):
            # [()] casts a Dataset as a numpy array
            ret_dict[name] = obj[()]
        else:
            # create a dictionary for this group
            ret_dict[name] = {}
            for key, value in obj.items():
                self._populate_data(ret_dict[name], value, key)
项目:tflearn    作者:tflearn    | 项目源码 | 文件源码
def slice_array(X, start=None, stop=None):
    if type(X) == list:
        if hasattr(start, '__len__'):
            return [x[start] for x in X]
        else:
            return [x[start:stop] for x in X]
    if H5PY_SUPPORTED:
        if type(X) == h5py.Dataset:
            return [X[i] for i in start]
    if hasattr(start, '__len__'):
        return X[start]
    else:
        return X[start:stop]
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def Dataset(self):
        if self._Dataset is None:
            try:
                from netCDF4 import Dataset
            except ImportError:
                Dataset = NotAModule(self._name)
            self._Dataset = Dataset
        return self._Dataset
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def Dataset(self):
        if self._err:
            raise self._err
        if self._Dataset is None:
            try:
                from h5py import Dataset
            except ImportError:
                Dataset = NotAModule(self._name)
            self._Dataset = Dataset
        return self._Dataset
项目:nyroglancer    作者:funkey    | 项目源码 | 文件源码
def __traverse_add(self, item, filename):

        if isinstance(item, h5py.Dataset):
            self.add_dataset(item, filename + item.name)
        elif isinstance(item, h5py.Group):
            for k in item:
                self.__traverse_add(item[k], filename)
        else:
            print("Skipping " + item.name)
项目:CRIkit2    作者:CoherentRamanNIST    | 项目源码 | 文件源码
def _convert_to_np_dtype(dset):
    """
    Given an HDF5 dataset, return the values in a numpy-builtin datatype

    Parameters
    ----------
    dset : h5py.Dataset
        HDF5 (h5py) dataset

    Returns
    -------
    out : numpy.ndarray (dtype = numpy built-in)

    Note
    ----
    The software accounts for big-/little-endianness, and the inability of \
    hdf5 to natively store complex numbers.

    """
    assert isinstance(dset, _h5py.Dataset), 'Input is not of type h5py.Dataset'
    # Single datatype
    if len(dset.dtype) == 0:
        converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('='))
        dset.read_direct(converted)
        if issubclass(converted.dtype.type, _np.integer):  # Integer to float
            converted = converted.astype(_np.float)
        return converted
    #Compound datatype of length 2-- assumed ('Re','Im')
    elif len(dset.dtype) == 2:
        print('Warning: h5py.complex_names set incorrectly using \'{}\' and \'{}\' \
for Re and Im, respectively'.format(dset.dtype.names[0], dset.dtype.names[1]))
        _h5py.get_config().complex_names = (dset.dtype.names[0],dset.dtype.names[1])
        dset = dset.file[dset.name]
        converted = _np.ndarray(dset.shape, dtype = dset.dtype.newbyteorder('='))
        dset.read_direct(converted)
    # Unknown datatype
    else:
        print('Warning: Unknown datatype. Returning dataset values as is.')
        return dset.value
    return converted
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def check_serialize(self, data):
        ret = self.serializer('w', data)
        dset = self.hdf5file['w']

        self.assertIsInstance(dset, h5py.Dataset)
        self.assertEqual(dset.shape, data.shape)
        self.assertEqual(dset.size, data.size)
        self.assertEqual(dset.dtype, data.dtype)
        read = numpy.empty((2, 3), dtype=numpy.float32)
        dset.read_direct(read)
        numpy.testing.assert_array_equal(read, cuda.to_cpu(data))

        self.assertEqual(dset.compression_opts, 3)

        self.assertIs(ret, data)
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def test_serialize_scalar(self):
        ret = self.serializer('x', 10)
        dset = self.hdf5file['x']

        self.assertIsInstance(dset, h5py.Dataset)
        self.assertEqual(dset.shape, ())
        self.assertEqual(dset.size, 1)
        self.assertEqual(dset.dtype, int)
        read = numpy.empty((), dtype=numpy.int32)
        dset.read_direct(read)
        self.assertEqual(read, 10)

        self.assertEqual(dset.compression_opts, None)

        self.assertIs(ret, 10)
项目:theanomodels    作者:clinicalml    | 项目源码 | 文件源码
def loadDataHDF5(data):
    if isinstance(data,h5py.File) or isinstance(data,h5py.Group):
        return {k:loadDataHDF5(v) for k,v in data.iteritems()}
    elif isinstance(data,h5py.Dataset):
        return data.value
    else:
        print 'unhandled datatype: %s' % type(data)
项目:FoundryDataBrowser    作者:ScopeFoundry    | 项目源码 | 文件源码
def _visitfunc(self, name, node):

        level = len(name.split('/'))
        indent = '    '*level
        localname = name.split('/')[-1]

        if isinstance(node, h5py.Group):
            self.tree_str += indent +"|> {}\n".format(localname)
        elif isinstance(node, h5py.Dataset):
            self.tree_str += indent +"|D {}: {} {}\n".format(localname, node.shape, node.dtype)
        for key, val in node.attrs.items():
            self.tree_str += indent+"    |- {} = {}\n".format(key, val)
项目:odin    作者:imito    | 项目源码 | 文件源码
def __init__(self, dataset, hdf=None, dtype=None, shape=None):
    super(Hdf5Data, self).__init__()
    raise Exception("Hdf5Data is under-maintanance!")
    # default chunks size is 32 (reduce complexity of the works)
    self._chunk_size = 32
    if isinstance(hdf, str):
      hdf = open_hdf5(hdf)
    if hdf is None and not isinstance(dataset, h5py.Dataset):
      raise ValueError('Cannot initialize dataset without hdf file')

    if isinstance(dataset, h5py.Dataset):
      self._data = dataset
      self._hdf = dataset.file
    else:
      if dataset not in hdf: # not created dataset
        if dtype is None or shape is None:
          raise ValueError('dtype and shape must be specified if '
                           'dataset has not created in hdf5 file.')
        shape = tuple([0 if i is None else i for i in shape])
        hdf.create_dataset(dataset, dtype=dtype,
            chunks=_get_chunk_size(shape, self._chunk_size),
            shape=shape, maxshape=(None, ) + shape[1:])

      self._data = hdf[dataset]
      if shape is not None and self._data[0].shape[1:] != shape[1:]:
        raise ValueError('Shape mismatch between predefined dataset '
                         'and given shape, {} != {}'
                         ''.format(shape, self._data[0].shape))
      self._hdf = hdf

  # ==================== properties ==================== #
项目:satpy    作者:pytroll    | 项目源码 | 文件源码
def collect_metadata(self, name, obj):
        if isinstance(obj, h5py.Dataset):
            self.file_content[name] = obj
            self.file_content[name + "/dtype"] = obj.dtype
            self.file_content[name + "/shape"] = obj.shape
        self._collect_attrs(name, obj.attrs)
项目:satpy    作者:pytroll    | 项目源码 | 文件源码
def __getitem__(self, key):
        val = self.file_content[key]
        if isinstance(val, h5py.Dataset):
            # these datasets are closed and inaccessible when the file is closed, need to reopen
            return h5py.File(self.filename, 'r')[key].value
        return val
项目:fbpic    作者:fbpic    | 项目源码 | 文件源码
def setup_openpmd_species_component( self, grp, quantity ) :
        """
        Set the attributes that are specific to a species component

        Parameter
        ---------
        grp : an h5py.Group object or h5py.Dataset

        quantity : string
            The name of the component
        """
        self.setup_openpmd_component( grp )
项目:fbpic    作者:fbpic    | 项目源码 | 文件源码
def setup_openpmd_record( self, dset, quantity ) :
        """
        Sets the attributes of a record, that comply with OpenPMD

        Parameter
        ---------
        dset : an h5py.Dataset or h5py.Group object

        quantity : string
           The name of the record considered
        """
        dset.attrs["unitDimension"] = unit_dimension_dict[quantity]
        # No time offset (approximation)
        dset.attrs["timeOffset"] = 0.
项目:fbpic    作者:fbpic    | 项目源码 | 文件源码
def setup_openpmd_component( self, dset ) :
        """
        Sets the attributes of a component, that comply with OpenPMD

        Parameter
        ---------
        dset : an h5py.Dataset or h5py.Group object
        """
        dset.attrs["unitSI"] = 1.
项目:deepjets    作者:deepjets    | 项目源码 | 文件源码
def reconstruct(particles, events=-1,
                config='delphes_card_ATLAS_NoFastJet.tcl',
                objects='Calorimeter/towers',
                random_state=0):

    if not os.path.exists(config):
        internal_config = os.path.join(
            os.environ.get('DEEPJETS_DIR'),
            'config', 'delphes', config)
        if not os.path.isabs(config) and os.path.exists(internal_config):
            log.warning("{0} does not exist but using internal "
                        "config with the same name instead: {1}".format(
                            config, internal_config))
            config = internal_config
        else:
            raise IOError("Delphes config not found: {0}".format(config))
    delphes = DelphesWrapper(config, random_state, objects)

    kwargs = dict()

    if isinstance(particles, MCInput):
        reco_func = reconstruct_mc
    elif isinstance(particles, h5.Dataset):
        reco_func = reconstruct_hdf5
    else:
        reco_func = reconstruct_iterable
        kwargs['events'] = events

        if not inspect.isgenerator(particles) and not isinstance(particles, list):
            # handle case where input is just one event
            particles = [particles]

    for event in reco_func(delphes, particles, **kwargs):
        yield event
项目:h5sparse    作者:appier    | 项目源码 | 文件源码
def __getitem__(self, key):
        h5py_item = self.h5py_group[key]
        if isinstance(h5py_item, h5py.Group):
            if 'h5sparse_format' in h5py_item.attrs:
                # detect the sparse matrix
                return Dataset(h5py_item)
            else:
                return Group(h5py_item)
        elif isinstance(h5py_item, h5py.Dataset):
            return h5py_item
        else:
            raise ValueError("Unexpected item type.")
项目:h5sparse    作者:appier    | 项目源码 | 文件源码
def create_dataset(self, name, shape=None, dtype=None, data=None,
                       format='csr', indptr_dtype=np.int64, indices_dtype=np.int32,
                       **kwargs):
        """Create 4 datasets in a group to represent the sparse array."""
        if data is None:
            raise NotImplementedError("Only support create_dataset with "
                                      "existed data.")
        elif isinstance(data, Dataset):
            group = self.h5py_group.create_group(name)
            group.attrs['h5sparse_format'] = data.h5py_group.attrs['h5sparse_format']
            group.attrs['h5sparse_shape'] = data.h5py_group.attrs['h5sparse_shape']
            group.create_dataset('data', data=data.h5py_group['data'],
                                 dtype=dtype, **kwargs)
            group.create_dataset('indices', data=data.h5py_group['indices'],
                                 dtype=indices_dtype, **kwargs)
            group.create_dataset('indptr', data=data.h5py_group['indptr'],
                                 dtype=indptr_dtype, **kwargs)
        else:
            group = self.h5py_group.create_group(name)
            group.attrs['h5sparse_format'] = get_format_str(data)
            group.attrs['h5sparse_shape'] = data.shape
            group.create_dataset('data', data=data.data, dtype=dtype, **kwargs)
            group.create_dataset('indices', data=data.indices,
                                 dtype=indices_dtype, **kwargs)
            group.create_dataset('indptr', data=data.indptr,
                                 dtype=indptr_dtype, **kwargs)
        return Dataset(group)
项目:evolution-strategies-starter    作者:openai    | 项目源码 | 文件源码
def initialize_from(self, filename, ob_stat=None):
        """
        Initializes weights from another policy, which must have the same architecture (variable names),
        but the weight arrays can be smaller than the current policy.
        """
        with h5py.File(filename, 'r') as f:
            f_var_names = []
            f.visititems(lambda name, obj: f_var_names.append(name) if isinstance(obj, h5py.Dataset) else None)
            assert set(v.name for v in self.all_variables) == set(f_var_names), 'Variable names do not match'

            init_vals = []
            for v in self.all_variables:
                shp = v.get_shape().as_list()
                f_shp = f[v.name].shape
                assert len(shp) == len(f_shp) and all(a >= b for a, b in zip(shp, f_shp)), \
                    'This policy must have more weights than the policy to load'
                init_val = v.eval()
                # ob_mean and ob_std are initialized with nan, so set them manually
                if 'ob_mean' in v.name:
                    init_val[:] = 0
                    init_mean = init_val
                elif 'ob_std' in v.name:
                    init_val[:] = 0.001
                    init_std = init_val
                # Fill in subarray from the loaded policy
                init_val[tuple([np.s_[:s] for s in f_shp])] = f[v.name]
                init_vals.append(init_val)
            self.set_all_vars(*init_vals)

        if ob_stat is not None:
            ob_stat.set_from_init(init_mean, init_std, init_count=1e5)
项目:klusta    作者:kwikteam    | 项目源码 | 文件源码
def _write_by_chunk(dset, arrs):
    # Note: arrs should be a generator for performance reasons.
    assert isinstance(dset, Dataset)
    # Start the data.
    offset = 0
    for arr in arrs:
        n = arr.shape[0]
        arr = arr[...]
        # Match the shape of the chunk array with the dset shape.
        assert arr.shape == (n,) + dset.shape[1:]
        dset[offset:offset + n, ...] = arr
        offset += arr.shape[0]
    # Check that the copy is complete.
    assert offset == dset.shape[0]
项目:klusta    作者:kwikteam    | 项目源码 | 文件源码
def datasets(self, path='/'):
        """Return the list of datasets under a given node."""
        return [key for key in self.children(path)
                if isinstance(self._h5py_file[path + '/' + key],
                              h5py.Dataset)]

    # Miscellaneous properties
    #--------------------------------------------------------------------------
项目:klusta    作者:kwikteam    | 项目源码 | 文件源码
def _print_node_info(self, name, node):
        """Print node information."""
        info = ('/' + name).ljust(50)
        if isinstance(node, h5py.Group):
            pass
        elif isinstance(node, h5py.Dataset):
            info += str(node.shape).ljust(20)
            info += str(node.dtype).ljust(8)
        print(info)
项目:h5writer    作者:mhantke    | 项目源码 | 文件源码
def _write_to_f(self, name, data):
        if data is None:
            log_warning(logger, "Data %s is None! Skipping this item as we cannot write this data type." % name)
        elif name in self._f:
            log_warning(logger, "Dataset %s already exists! Overwriting with new data." % name)
        else:
            log_debug(logger, "Writing to dataset %s." % name)
            self._f[name] = data
项目:h5writer    作者:mhantke    | 项目源码 | 文件源码
def _create_dataset(self, data, name):
        data = numpy.asarray(data)
        try:
            h5py.h5t.py_create(data.dtype, logical=1)
        except TypeError:
            log_and_raise_error(logger, self._log_prefix + "Could not save dataset %s. Conversion to numpy array failed" % (name))
            return 1
        if data.nbytes == 0:
            log_and_raise_error(logger, self._log_prefix + "Could not save dataset %s. Dataset is empty" % (name))
            return 1
        maxshape = tuple([None]+list(data.shape))
        shape = tuple([self._chunksize]+list(data.shape))
        dtype = data.dtype
        if dtype.type is numpy.string_:
            dtype = h5py.new_vlen(str)
        nbytes_chunk = numpy.prod(shape) * dtype.itemsize
        if nbytes_chunk > CHUNKSIZE_MIN_IN_BYTES:
            chunksize = self._chunksize
            #log_debug(logger, self._log_prefix + "Do not increase chunksize (%i) for dataset %s (%i bytes for single data frame)" % (self._chunksize, name, nbytes_chunk))
        else:
            chunksize = int(numpy.ceil(float(CHUNKSIZE_MIN_IN_BYTES) / float(data.nbytes)))
            log_debug(logger, self._log_prefix + "Increase chunksize from %i to %i for dataset %s (only %i bytes for single data frame)" % (self._chunksize, chunksize, name, nbytes_chunk))
        chunksize = min([chunksize, CHUNKSIZE_MAX_IN_FRAMES])
        chunks = tuple([chunksize]+list(data.shape))
        ndim = data.ndim
        axes = "experiment_identifier"
        if ndim == 1: axes = axes + ":x"
        elif ndim == 2: axes = axes + ":y:x"
        elif ndim == 3: axes = axes + ":z:y:x"
        log_debug(logger, self._log_prefix + "Create dataset %s [shape=%s, chunks=%s, dtype=%s]" % (name, str(shape), str(chunks), str(dtype)))
        self._f.create_dataset(name, shape, maxshape=maxshape, dtype=dtype, chunks=chunks)
        self._f[name].attrs.modify("axes",[axes])
        return 0
项目:neurodriver    作者:neurokernel    | 项目源码 | 文件源码
def dataset_append(dataset, arr):
    """
    Append an array to an h5py dataset.

    Parameters
    ----------
    dataset : h5py.Dataset
        Dataset to extend. Must be resizable in its first dimension.
    arr : numpy.ndarray
        Array to append. All dimensions of `arr` other than the first 
        dimension must be the same as those of the dataset.        
    """

    assert isinstance(dataset, h5py.Dataset)
    assert isinstance(arr, np.ndarray)

    # Save leading dimension of stored array:
    maxshape = list(dataset.shape)
    old_ld_dim = maxshape[0]

    # Extend leading dimension of stored array to accommodate new array:
    maxshape[0] += arr.shape[0]
    dataset.resize(maxshape)

    # Compute slices to use when assigning `arr` to array extension:
    slices = [slice(old_ld_dim, None)]
    for s in maxshape[1:]:
        slices.append(slice(None, None))

    # Convert list of slices to tuple because __setitem__ can 
    # only handle "simple" indexes:
    slices = tuple(slices)
    dataset.__setitem__(slices, arr)
项目:anndata    作者:theislab    | 项目源码 | 文件源码
def __getitem__(self, key):
        h5py_item = self.h5py_group[key]
        if isinstance(h5py_item, h5py.Group):
            if 'h5sparse_format' in h5py_item.attrs:
                # detect the sparse matrix
                return SparseDataset(h5py_item)
            else:
                return Group(h5py_item)
        elif isinstance(h5py_item, h5py.Dataset):
            return h5py_item
        else:
            raise ValueError("Unexpected item type.")
项目:h5pyd    作者:HDFGroup    | 项目源码 | 文件源码
def setUp(self):
        TestCase.setUp(self)
        filename = self.getFileName("dataset_testempty")
        print("filename:", filename)
        """
        self.f = h5py.File(filename, 'w')
        sid = h5py.h5s.create(h5py.h5s.NULL)
        tid = h5py.h5t.C_S1.copy()
        tid.set_size(10)
        dsid = h5py.h5d.create(self.f.id, b'x', tid, sid)
        self.dset = h5py.Dataset(dsid)
        """
项目:bark    作者:kylerbrown    | 项目源码 | 文件源码
def arf2bark(arf_file, root_parent, timezone, verbose):
    with arf.open_file(arf_file, 'r') as af:
        # root
        root_dirname = os.path.splitext(arf_file)[0]
        root_path = os.path.join(os.path.abspath(root_parent), root_dirname)
        os.mkdir(root_path)
        root = bark.Root(root_path)
        if verbose:
            print('Created Root: ' + root_path)
        tle = None
        found_trigin = False
        for ename, entry in af.items(): # entries and top-level datasets
            if isinstance(entry, h5py.Group): # entries
                entry_path = os.path.join(root_path, ename)
                entry_attrs = copy_attrs(entry.attrs)
                timestamp = entry_attrs.pop('timestamp')
                if timezone:
                    timestamp = bark.convert_timestamp(timestamp, timezone)
                else:
                    timestamp = bark.convert_timestamp(timestamp)
                bark_entry = bark.create_entry(entry_path,
                                               timestamp,
                                               parents=False,
                                               **entry_attrs)
                if verbose:
                    print('Created Entry: ' + entry_path)
                for ds_name, dataset in entry.items(): # entry-level datasets
                    if ds_name == 'trig_in': # accessing trig_in -> segfault
                        found_trigin = True # and skip the dataset
                    else:
                        transfer_dset(ds_name, dataset, entry_path, verbose)
            elif isinstance(entry, h5py.Dataset): # top-level datasets
                if tle is None:
                    path = os.path.join(root_path, 'top_level')
                    tle = bark.create_entry(path, 0, parents=False).path
                transfer_dset(ename, entry, tle, verbose)
        if found_trigin:
            print('Warning: found datasets named "trig_in". Jill-created ' +
                  '"trig_in" datasets segfault when read, so these datasets' +
                  ' were skipped. If you know the datasets are good, rename' +
                  ' them and try again.')
    return bark.Root(root_path)
项目:CRIkit2    作者:CoherentRamanNIST    | 项目源码 | 文件源码
def hdf_is_valid_dsets(pth, filename, dset_list):
    """
    Validate file and datasets exist. Return boolean as to whether valid

    """
    # Join path and filename in an os-independant way
    pfname = _os.path.normpath(_os.path.join(pth, filename))


    isvalid = False
    fileexists = False

    try:
        f = _h5py.File(pfname, 'r')
        print('File exists: \'{}\''.format(pfname))
        fileexists = True
    except OSError:
        print('File does not exist: \'{}\''.format(pfname))
        fileexists = False
    else:
        if isinstance(dset_list, list):  # List of dataset(s)
            try:
                for count in dset_list:
                    f[count]
            except:
                print('dataset: {} is invalid'.format(count))
            else:
                print('All datasets are valid')
                isvalid = True
        elif isinstance(dset_list, str):  # Single dataset string name
            try:
                f[dset_list]
            except:
                print('dataset {} is invalid'.format(count))
            else:
                print('Dataset is valid')
                isvalid = True
        else:
            print('dset_list is unrecognized type')
    finally:
        if fileexists:
            f.close()

        return isvalid
项目:gcMapExplorer    作者:rjdkmr    | 项目源码 | 文件源码
def _readMap(self, resolution=None):
        """ Temporarily store h5py.Dataset object to GCMAP.matrix and update all attributes for given map.
        """

        if self.groupName not in self.hdf5:
            raise util.MapNotFoundError(' [{0}] dataset not found in [{1}] file...'.format(self.groupName, self.hdf5.filename))

        # determining finest resolution map
        self.binsizes = []
        for key in self.hdf5[self.groupName].keys():
            if 'bNoData' not in key:
                self.binsizes.append( self.hdf5[self.groupName][key].attrs['binsize'] )
        self.binsizes = sorted(self.binsizes)

        # At the start, always choose finest resolution
        self.finestResolution = util.binsizeToResolution(self.binsizes[0])
        if resolution is not None:
            resolutionList = list( map(util.binsizeToResolution, self.binsizes ) )
            if resolution in resolutionList:
                self.resolution = resolution
            else:
                raise util.ResolutionNotFoundError (' "{0}" resolution not found for "{1}" in file: "{2}".'.format(resolution, self.groupName, self.hdf5.filename ) )
        else:
            self.resolution = self.finestResolution

        self.dtype = self.hdf5[self.groupName][self.resolution].dtype

        for key in ['xlabel', 'ylabel']:
            self.__dict__[key] = self.hdf5[self.groupName].attrs[key]

        for key in ['minvalue', 'maxvalue', 'binsize']:
            self.__dict__[key] = self.hdf5[self.groupName][self.resolution].attrs[key]

        self.shape = (self.hdf5[self.groupName][self.resolution].attrs['xshape'], self.hdf5[self.groupName][self.resolution].attrs['yshape'])
        self.xticks = [0, self.shape[0]*self.binsize]
        self.yticks = [0, self.shape[1]*self.binsize]

        self.title = self.xlabel + '_vs_' + self.ylabel

        if self.resolution+'-bNoData' in self.hdf5[self.groupName]:
            self.bNoData = np.asarray( self.hdf5[self.groupName][self.resolution+'-bNoData'][:], dtype=np.bool )
        self.matrix = self.hdf5[self.groupName][self.resolution]
项目:wradlib    作者:wradlib    | 项目源码 | 文件源码
def read_generic_hdf5(fname):
    """Reads hdf5 files according to their structure

    In contrast to other file readers under :meth:`wradlib.io`, this function
    will *not* return a two item tuple with (data, metadata). Instead, this
    function returns ONE dictionary that contains all the file contents - both
    data and metadata. The keys of the output dictionary conform to the
    Group/Subgroup directory branches of the original file.

    Parameters
    ----------
    fname : string
        a hdf5 file path

    Returns
    -------
    output : dict
        a dictionary that contains both data and metadata according to the
        original hdf5 file structure

    Examples
    --------
    See :ref:`notebooks/fileio/wradlib_radar_formats.ipynb#Generic-HDF5`.
    """
    f = h5py.File(fname, "r")
    fcontent = {}

    def filldict(x, y):
        # create a new container
        tmp = {}
        # add attributes if present
        if len(y.attrs) > 0:
            tmp['attrs'] = dict(y.attrs)
        # add data if it is a dataset
        if isinstance(y, h5py.Dataset):
            tmp['data'] = np.array(y)
        # only add to the dictionary, if we have something meaningful to add
        if tmp != {}:
            fcontent[x] = tmp

    f.visititems(filldict)

    f.close()

    return fcontent
项目:wradlib    作者:wradlib    | 项目源码 | 文件源码
def read_OPERA_hdf5(fname):
    """Reads hdf5 files according to OPERA conventions

    Please refer to the OPERA data model documentation :cite:`OPERA-data-model`
    in order to understand how an hdf5 file is organized that conforms to the
    OPERA ODIM_H5 conventions.

    In contrast to other file readers under :meth:`wradlib.io`, this function
    will *not* return a two item tuple with (data, metadata). Instead, this
    function returns ONE dictionary that contains all the file contents - both
    data and metadata. The keys of the output dictionary conform to the
    Group/Subgroup directory branches of the original file.
    If the end member of a branch (or path) is "data", then the corresponding
    item of output dictionary is a numpy array with actual data.

    Any other end member (either *how*, *where*,
    and *what*) will contain the meta information applying to the corresponding
    level of the file hierarchy.

    Parameters
    ----------
    fname : string
        a hdf5 file path

    Returns
    -------
    output : dict
        a dictionary that contains both data and metadata according to the
        original hdf5 file structure
    """
    f = h5py.File(fname, "r")

    # now we browse through all Groups and Datasets and store the info in one
    # dictionary
    fcontent = {}

    def filldict(x, y):
        if isinstance(y, h5py.Group):
            if len(y.attrs) > 0:
                fcontent[x] = dict(y.attrs)
        elif isinstance(y, h5py.Dataset):
            fcontent[x] = np.array(y)

    f.visititems(filldict)

    f.close()

    return fcontent
项目:deepjets    作者:deepjets    | 项目源码 | 文件源码
def cluster(inputs,
            events=-1,
            skip_failed=True,
            eta_max=5.,
            jet_size=1.0, subjet_size=0.3,
            subjet_pt_min_fraction=0.05,
            subjet_dr_min=0.,
            trimmed_pt_min=-1., trimmed_pt_max=-1.,
            trimmed_mass_min=-1., trimmed_mass_max=-1.,
            shrink=False, shrink_mass=-1,
            compute_auxvars=False):
    """
    Cluster particles into jets. Inputs may be an MCInput, h5py Dataset,
    an array of particles (single event) or a generator that yields events
    of particles.

    The events and skip_failed arguments are only applied in the case that
    inputs is a generator function.
    """

    if jet_size <= 0:
        raise ValueError("jet_size must be greater than zero")

    if subjet_size <= 0 or subjet_size > 0.5 * jet_size:
        raise ValueError(
            "subjet_size must be in the range (0, 0.5 * jet_size]")

    kwargs = dict(
        eta_max=eta_max,
        jet_size=jet_size,
        subjet_size=subjet_size,
        subjet_pt_min_fraction=subjet_pt_min_fraction,
        subjet_dr_min=subjet_dr_min,
        trimmed_pt_min=trimmed_pt_min,
        trimmed_pt_max=trimmed_pt_max,
        trimmed_mass_min=trimmed_mass_min,
        trimmed_mass_max=trimmed_mass_max,
        shrink=shrink,
        shrink_mass=shrink_mass,
        compute_auxvars=compute_auxvars)

    if isinstance(inputs, MCInput):
        cluster_func = cluster_mc
    elif isinstance(inputs, h5.Dataset):
        cluster_func = cluster_hdf5
    else:
        cluster_func = cluster_iterable
        kwargs['events'] = events
        kwargs['skip_failed'] = skip_failed

        if not inspect.isgenerator(inputs) and not isinstance(inputs, list):
            # handle case where input is just one event
            inputs = [inputs]

    for event in cluster_func(inputs, **kwargs):
        yield event
项目:hic2cool    作者:4dn-dcic    | 项目源码 | 文件源码
def rlencode(array, chunksize=None):
    """
    Run length encoding.
    Based on http://stackoverflow.com/a/32681075, which is based on the rle
    function from R.

    TAKEN FROM COOLER

    Parameters
    ----------
    x : 1D array_like
        Input array to encode
    dropna: bool, optional
        Drop all runs of NaNs.

    Returns
    -------
    start positions, run lengths, run values

    """
    where = np.flatnonzero
    if not isinstance(array, h5py.Dataset):
        array = np.asarray(array)
    n = len(array)
    if n == 0:
        return (np.array([], dtype=int),
                np.array([], dtype=int),
                np.array([], dtype=array.dtype))

    if chunksize is None:
        chunksize = n

    starts, values = [], []
    last_val = np.nan
    for i in range(0, n, chunksize):
        x = array[i:i+chunksize]
        locs = where(x[1:] != x[:-1]) + 1
        if x[0] != last_val:
            locs = np.r_[0, locs]
        starts.append(i + locs)
        values.append(x[locs])
        last_val = x[-1]
    starts = np.concatenate(starts)
    lengths = np.diff(np.r_[starts, n])
    values = np.concatenate(values)

    return starts, lengths, values