Python h5py 模块,File() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用h5py.File()

项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def allocate(self, shape, data_dtype=None):

        if data_dtype is None:
            data_dtype = self.data_dtype

        if self._parallel_write:
            self.my_file = h5py.File(self.file_name, mode='w', driver='mpio', comm=comm)
            self.my_file.create_dataset(self.h5_key, dtype=data_dtype, shape=shape)
        else:
            self.my_file = h5py.File(self.file_name, mode='w')
            if self.is_master:
                if self.compression != '':
                    self.my_file.create_dataset(self.h5_key, dtype=data_dtype, shape=shape, compression=self.compression, chunks=True)
                else:
                    self.my_file.create_dataset(self.h5_key, dtype=data_dtype, shape=shape, chunks=True)

        self.my_file.close()
        self._read_from_header()
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def test_patch_for_similarities(params, extension):

    file_out_suff  = params.get('data', 'file_out_suff')
    template_file  = file_out_suff + '.templates%s.hdf5' %extension
    if os.path.exists(template_file):
        try:
            myfile = h5py.File(template_file, 'r', libver='latest')
            version = myfile.get('version')[0].decode('ascii')
            myfile.close()
        except Exception:
            version = None
    else:
        raise Exception('No templates found! Check suffix?')

    if version is not None:
        if (StrictVersion(version) >= StrictVersion('0.6.0')):
            return True
    else:
        print_and_log(["Version is below 0.6.0"], 'debug', logger)
        return False
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def test_validating(self):
        #mpi_launch('fitting', self.file_name, 2, 0, 'False')


        a, b            = os.path.splitext(os.path.basename(self.file_name))
        file_name, ext  = os.path.splitext(self.file_name)
        file_out        = os.path.join(os.path.abspath(file_name), a)
        result_name     = os.path.join(file_name, 'injected')
        spikes          = {}
        result          = h5py.File(os.path.join(result_name, '%s.result.hdf5' %a))
        for key in result.get('spiketimes').keys():
            spikes[key] = result.get('spiketimes/%s' %key)[:]

        juxta_file = file_out + '.juxta.dat'

        f = numpy.memmap(juxta_file, shape=(self.length,1), dtype=self.parser.get('validating', 'juxta_dtype'), mode='w+')
        f[spikes['temp_9']] = 100
        del f

        mpi_launch('validating', self.file_name, 2, 0, 'False')
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def report(self, summary_json_paths, barcode_summary_h5_path, recovered_cells, cell_bc_seqs):
        assert len(cell_bc_seqs) == len(self.matrices)

        barcode_summary_h5 = h5.File(barcode_summary_h5_path, 'r')

        d = {}

        d.update(self._report_genome_agnostic_metrics(
            summary_json_paths, barcode_summary_h5, recovered_cells, cell_bc_seqs))

        # Compute genome-specific metrics
        for i, (genome, matrix) in enumerate(self.matrices.iteritems()):
            for key, value in matrix.report(genome,
                                            barcode_summary_h5,
                                            recovered_cells,
                                            cell_bc_seqs=cell_bc_seqs[i],
                                        ).iteritems():
                key = '_'.join([genome, key])
                d[key] = value
        return d
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def write_data_frame(fn, df):
    ''' Write the pandas dataframe object to an HDF5 file.  Each column is written as a single 1D dataset at the top
    level of the HDF5 file, using the native pandas datatype'''

    # Always write a fresh file -- the 'w' argument to h5py.File is supposed to truncate an existing file, but it doesn't appear to work correctly
    if os.path.exists(fn):
        os.remove(fn)

    f = h5py.File(fn, "w")

    # To preserve column order, write columns to an attribute
    column_names = np.array(list(df.columns))
    f.attrs.create("column_names", column_names)

    for col in df.columns:
        write_data_column(f, df[col])

    f.close()
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def read_data_frame(fn, query_cols=[]):
    ''' Load a pandas DataFrame from an HDF5 file. If a column list is specified, only load the matching columns '''

    with h5py.File(fn, 'r') as f:

        column_names = f.attrs.get("column_names")
        column_names = get_column_intersection(column_names, query_cols)

        df = p.DataFrame()

        # Add the columns progressively to save memory
        for name in column_names:
            ds = f[name]
            if has_levels(ds):
                indices = ds[:]
                uniques = get_levels(ds)
                # This method of constructing of Categorical avoids copying the indices array
                # which saves memory for big datasets
                df[name] = p.Categorical(indices, categories=uniques, ordered=False, fastpath=True)
            else:
                df[name] = p.Series(ds[:])

        return df
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def read_data_frame_indexed_no_concat(fn, tabix_queries, query_cols = [], coords = True):
    ''' Read rows from the HDF5 data frame that match each tabix query in the
    queries list.  A tabix query is in the form ('chr1', 100, 200). query_cols
    is a list of columns you want to return. If coords is True, then it it will
    return coordinates regardless of query_cols. If coords is False, it will
    only return the columns specified in query_cols. Returns a list of pandas
    DataFrames, one for each query. '''

    f = h5py.File(fn, 'r')

    # read the index
    tabix_index = read_tabix_index(f)

    dfs = []
    for q in tabix_queries:
        r = _read_data_frame_indexed_sub(f, tabix_index, q, query_cols = query_cols, coords = coords)
        dfs.append(r)

    f.close()

    # Return the union of the queries
    return dfs
项目:Fast5-to-Fastq    作者:rrwick    | 项目源码 | 文件源码
def check_filters(fast5_file, min_length, min_mean_qual, min_qual_window, window_size):
    try:
        hdf5_file = h5py.File(fast5_file, 'r')
        names = get_hdf5_names(hdf5_file)
        basecall_location = get_best_fastq_hdf5_location(hdf5_file, names)
        if basecall_location:
            fastq_str = hdf5_file[basecall_location].value
            try:
                parts = fastq_str.split(b'\n')
                seq, quals = parts[1], parts[3]
            except IndexError:
                fastq_str, seq, quals = '', '', ''
            if not fastq_str or not seq:
                return False, 0
            if min_mean_qual and get_mean_qscore(quals) < min_mean_qual:
                return False, 0
            if min_length and len(seq) < min_length:
                return False, 0
            if min_qual_window and get_min_window_qscore(quals, window_size) < min_qual_window:
                return False, 0
            return True, len(seq)
    except (IOError, RuntimeError):
        pass
    return False, 0
项目:Fast5-to-Fastq    作者:rrwick    | 项目源码 | 文件源码
def min_window_qual_and_length(fast5_file, window_size):
    try:
        hdf5_file = h5py.File(fast5_file, 'r')
        names = get_hdf5_names(hdf5_file)
        basecall_location = get_best_fastq_hdf5_location(hdf5_file, names)
        if basecall_location:
            fastq_str = hdf5_file[basecall_location].value
            try:
                parts = fastq_str.split(b'\n')
                seq, quals = parts[1], parts[3]
                return get_min_window_qscore(quals, window_size), len(seq), fast5_file
            except IndexError:
                pass
    except (IOError, RuntimeError):
        pass
    return 0.0, 0, fast5_file
项目:pointnet    作者:charlesq34    | 项目源码 | 文件源码
def save_h5_data_label_normal(h5_filename, data, label, normal, 
        data_dtype='float32', label_dtype='uint8', noral_dtype='float32'):
    h5_fout = h5py.File(h5_filename)
    h5_fout.create_dataset(
            'data', data=data,
            compression='gzip', compression_opts=4,
            dtype=data_dtype)
    h5_fout.create_dataset(
            'normal', data=normal,
            compression='gzip', compression_opts=4,
            dtype=normal_dtype)
    h5_fout.create_dataset(
            'label', data=label,
            compression='gzip', compression_opts=1,
            dtype=label_dtype)
    h5_fout.close()


# Write numpy array data and label to h5_filename
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def main():
    parser = generate_parser()
    args = parser.parse_args()
    infile1 = h5py.File(args.input1, 'r')
    infile2 = h5py.File(args.input2, 'r')
    resolutions = numpy.intersect1d(infile1['resolutions'][...], infile2['resolutions'][...])
    chroms = numpy.intersect1d(infile2['chromosomes'][...], infile2['chromosomes'][...])
    results = {}
    data1 = load_data(infile1, chroms, resolutions)
    data2 = load_data(infile2, chroms, resolutions)
    infile1.close()
    infile2.close()
    results = {}
    results[(args.input1.split('/')[-1].strip('.quasar'), args.input2.split('/')[-1].strip('.quasar'))] = correlate_samples(data1, data2)
    for resolution in data1.keys():
        for chromo in chroms:
            plt.scatter(data1[resolution][chromo][1].flatten(),data2[resolution][chromo][1].flatten(),alpha=0.1,color='red')
            plt.show()
            plt.savefig(args.output+'.res'+str(resolution)+'.chr'+chromo+'.pdf')
项目:genomedisco    作者:kundajelab    | 项目源码 | 文件源码
def fill_hdf5_with_sparse_by_chunk(mym1,mym2,fname,chunksize):
    start1=0
    end1=0
    n=mym1.shape[0]

    f=h5py.File(fname,'w')
    m1hdf5=f.create_dataset('m1',shape=(n,n),dtype='float')
    m2hdf5=f.create_dataset('m2',shape=(n,n),dtype='float')

    while end1<n:
        end1=np.min([n,(start1+chunksize)])
        print 'start1: '+str(start1)

        if (end1-start1)==1:
            m1hdf5[start1,:]=mym1[start1,:].toarray()
            m2hdf5[start1,:]=mym2[start1,:].toarray()
        else:
            m1hdf5[start1:end1,:]=mym1[start1:end1,:].toarray()
            m2hdf5[start1:end1,:]=mym2[start1:end1,:].toarray()
        start1=end1
    print 'sum of 1'
    print m1hdf5[:,:].sum()
    print m2hdf5[:,:].sum()
    f.close()
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __init__(self, data=None, info=None, dtype=None, file=None, copy=False, **kwargs):
        object.__init__(self)
        #self._infoOwned = False
        self._isHDF = False

        if file is not None:
            self._data = None
            self.readFile(file, **kwargs)
            if kwargs.get("readAllData", True) and self._data is None:
                raise Exception("File read failed: %s" % file)
        else:
            self._info = info
            if (hasattr(data, 'implements') and data.implements('MetaArray')):
                self._info = data._info
                self._data = data.asarray()
            elif isinstance(data, tuple):  ## create empty array with specified shape
                self._data = np.empty(data, dtype=dtype)
            else:
                self._data = np.array(data, dtype=dtype, copy=copy)

        ## run sanity checks on info structure
        self.checkInfo()
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def transpose(self, *args):
        if len(args) == 1 and hasattr(args[0], '__iter__'):
            order = args[0]
        else:
            order = args

        order = [self._interpretAxis(ax) for ax in order]
        infoOrder = order  + list(range(len(order), len(self._info)))
        info = [self._info[i] for i in infoOrder]
        order = order + list(range(len(order), self.ndim))

        try:
            if self._isHDF:
                return MetaArray(np.array(self._data).transpose(order), info=info)
            else:
                return MetaArray(self._data.transpose(order), info=info)
        except:
            print(order)
            raise

    #### File I/O Routines
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def export(self, fileName=None):
        if not HAVE_HDF5:
            raise RuntimeError("This exporter requires the h5py package, "
                               "but it was not importable.")

        if not isinstance(self.item, PlotItem):
            raise Exception("Must have a PlotItem selected for HDF5 export.")

        if fileName is None:
            self.fileSaveDialog(filter=["*.h5", "*.hdf", "*.hd5"])
            return
        dsname = self.params['Name']
        fd = h5py.File(fileName, 'a') # forces append to file... 'w' doesn't seem to "delete/overwrite"
        data = []

        appendAllX = self.params['columnMode'] == '(x,y) per plot'
        for i,c in enumerate(self.item.curves):
            d = c.getData()
            if appendAllX or i == 0:
                data.append(d[0])
            data.append(d[1])

        fdata = numpy.array(data).astype('double')
        dset = fd.create_dataset(dsname, data=fdata)
        fd.close()
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __load_page_data(self):
        self.__clearRows()
        if hasattr(self,"selectChan"):
            with hp.File(self.file_name,"r") as f:
                sampling_rate = f["analogs"][self.selectChan]["sampling_rate"].value
                start_time = f["analogs"][self.selectChan]["start_time"].value
                start_point = sampling_rate*self.row_num*self.current_page
                end_point = sampling_rate*self.row_num*(self.current_page+1)
                self.page_data = f["analogs"][self.selectChan]["data"][start_point:end_point]
                self.sigma = np.median(np.abs(self.page_data)/0.6745)
                Thr = self.thresholds[self.selectChan] * self.sigma
            self.sampling_rate = sampling_rate
            self.row_wins_rois = [0]*self.row_num
            for i in range(self.row_num):
                start_point = i*sampling_rate
                end_point = (i+1)*sampling_rate
                if self.page_data[start_point:end_point].size:
                    ys = self.page_data[start_point:end_point]
                    xs = np.arange(ys.size)
                    line = MultiLine(np.array([xs]),np.array([ys]),"w")
                    self.row_wins[i].addItem(line)

                self.row_wins_rois[i] = pg.InfiniteLine(pos=Thr,angle=0,movable=False)
                self.row_wins_rois[i].setZValue(10)
                self.row_wins[i].addItem(self.row_wins_rois[i])
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __load_waveforms(self,selectChan,file_name):
        spk_startswith = "spike_{0}".format(selectChan)
        with hp.File(file_name,"r") as f:
            times = list()
            waveforms = list()
            for chn_unit in f["spikes"].keys():
                if chn_unit.startswith(spk_startswith):
                    tep_time = f["spikes"][chn_unit]["times"].value
                    waveform = f["spikes"][chn_unit]["waveforms"].value
                    times.append(tep_time)
                    waveforms.append(waveform)
            if times:
                times = np.hstack(times)
                waveforms = np.vstack(waveforms)
                sort_index = np.argsort(times)
                waveforms = waveforms[sort_index]
                return waveforms
            else:
                return None
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __init__(self, data=None, info=None, dtype=None, file=None, copy=False, **kwargs):
        object.__init__(self)
        #self._infoOwned = False
        self._isHDF = False

        if file is not None:
            self._data = None
            self.readFile(file, **kwargs)
            if kwargs.get("readAllData", True) and self._data is None:
                raise Exception("File read failed: %s" % file)
        else:
            self._info = info
            if (hasattr(data, 'implements') and data.implements('MetaArray')):
                self._info = data._info
                self._data = data.asarray()
            elif isinstance(data, tuple):  ## create empty array with specified shape
                self._data = np.empty(data, dtype=dtype)
            else:
                self._data = np.array(data, dtype=dtype, copy=copy)

        ## run sanity checks on info structure
        self.checkInfo()
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def transpose(self, *args):
        if len(args) == 1 and hasattr(args[0], '__iter__'):
            order = args[0]
        else:
            order = args

        order = [self._interpretAxis(ax) for ax in order]
        infoOrder = order  + list(range(len(order), len(self._info)))
        info = [self._info[i] for i in infoOrder]
        order = order + list(range(len(order), self.ndim))

        try:
            if self._isHDF:
                return MetaArray(np.array(self._data).transpose(order), info=info)
            else:
                return MetaArray(self._data.transpose(order), info=info)
        except:
            print(order)
            raise

    #### File I/O Routines
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def export(self, fileName=None):
        if not HAVE_HDF5:
            raise RuntimeError("This exporter requires the h5py package, "
                               "but it was not importable.")

        if not isinstance(self.item, PlotItem):
            raise Exception("Must have a PlotItem selected for HDF5 export.")

        if fileName is None:
            self.fileSaveDialog(filter=["*.h5", "*.hdf", "*.hd5"])
            return
        dsname = self.params['Name']
        fd = h5py.File(fileName, 'a') # forces append to file... 'w' doesn't seem to "delete/overwrite"
        data = []

        appendAllX = self.params['columnMode'] == '(x,y) per plot'
        for i,c in enumerate(self.item.curves):
            d = c.getData()
            if appendAllX or i == 0:
                data.append(d[0])
            data.append(d[1])

        fdata = numpy.array(data).astype('double')
        dset = fd.create_dataset(dsname, data=fdata)
        fd.close()
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def h5_io(filename, spike_to_load, analog_to_load):
    spikes = dict()
    analogs = dict()
    events = dict()
    comments = dict()
    with hp.File(filename,'r') as f:
        for key in f.keys():
            if key=='events':
                events['times'] = f[key]['times'].value
                events['labels'] = f[key]['labels'].value
            elif key=='comments':
                comments['times'] = f[key]['times'].value
                comments['labels'] = f[key]['labels'].value
            elif key=='spikes':
                for tem_key in f[key].keys():
                    if tem_key in spike_to_load:
                        spikes[tem_key] = f[key][tem_key]['times'].value
            elif key=='analogs':
                for tem_key in f[key].keys():
                    if tem_key in analog_to_load:
                        analogs[tem_key] = dict()
                        analogs[tem_key]['data'] = f[key][tem_key]['data'].value
                        analogs[tem_key]['sampling_rate'] = f[key][tem_key]['sampling_rate'].value
                        analogs[tem_key]['start_time'] = f[key][tem_key]['start_time'].value
    return events,comments,spikes,analogs
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def __load_page_data(self):
        self.__clearRows()
        if hasattr(self,"selectChan"):
            with hp.File(self.file_name,"r") as f:
                sampling_rate = f["analogs"][self.selectChan]["sampling_rate"].value
                start_time = f["analogs"][self.selectChan]["start_time"].value
                start_point = sampling_rate*self.row_num*self.current_page
                end_point = sampling_rate*self.row_num*(self.current_page+1)
                self.page_data = f["analogs"][self.selectChan]["data"][start_point:end_point]
                self.sigma = np.median(np.abs(self.page_data)/0.6745)
                Thr = self.thresholds[self.selectChan] * self.sigma
            self.sampling_rate = sampling_rate
            self.row_wins_rois = [0]*self.row_num
            for i in range(self.row_num):
                start_point = i*sampling_rate
                end_point = (i+1)*sampling_rate
                if self.page_data[start_point:end_point].size:
                    ys = self.page_data[start_point:end_point]
                    xs = np.arange(ys.size)
                    line = MultiLine(np.array([xs]),np.array([ys]),"w")
                    self.row_wins[i].addItem(line)

                self.row_wins_rois[i] = pg.InfiniteLine(pos=Thr,angle=0,movable=False)
                self.row_wins_rois[i].setZValue(10)
                self.row_wins[i].addItem(self.row_wins_rois[i])
项目:spikefuel    作者:duguyue100    | 项目源码 | 文件源码
def gen_tracking_db(database, tracking_stats):
    """Generate TrackingDataset structure.

    Parameters
    ----------
    database : h5py.File
        HDF5 file object
    tracking_stats : dictionary
        the dictionary that contains TrackingDataset's stats

    Returns
    -------
    database : h5py.File
        HDF5 file object with multiple groups
    """
    primary_list = tracking_stats["primary_list"]

    for pc in primary_list:
        if pc not in database:
            database.create_group(pc)
            print "[MESSAGE] Primary group %s is created" % (pc)

    print "[MESSAGE] TrackingDataset HDF5 structure is generated."
项目:spikefuel    作者:duguyue100    | 项目源码 | 文件源码
def gen_caltech256_db(database, caltech256_stats):
    """Generate Caltech-256 structure.

    Parameters
    ----------
    database : h5py.File
        HDF5 file object
    caltech256_stats : dictionary
        the dictionary that contains Caltech-256's stats

    Returns
    -------
    database : h5py.File
        HDF5 file object with multiple groups
    """
    caltech256_list = caltech256_stats["caltech256_list"]

    for class_name in caltech256_list:
        if class_name not in database:
            database.create_group(class_name)
            print "[MESSAGE] Class %s is created" % (class_name)

    print "[MESSAGE] Caltech-256 HDF5 structure is generated."
项目:spikefuel    作者:duguyue100    | 项目源码 | 文件源码
def gen_ucf50_db(database, ucf50_stats):
    """Generate UCF50 structure.

    Parameters
    ----------
    database : h5py.File
        HDF5 file object
    ucf50_stats : dictionary
        the dictionary that contains UCF50's stats

    Returns
    -------
    database : h5py.File
        HDF5 file object with multiple groups
    """
    ucf50_list = ucf50_stats["ucf50_list"]

    for category in ucf50_list:
        if category not in database:
            database.create_group(category)
            print "[MESSAGE] Category %s is created" % (category)

    print "[MESSAGE] UCF-50 HDF5 structure is generated."
项目:tfutils    作者:neuroailab    | 项目源码 | 文件源码
def time_hdf5():
    data_path = create_hdf5(BATCH_SIZE * NSTEPS)

    f = h5py.File(data_path)
    durs = []
    for step in tqdm.trange(NSTEPS, desc='running hdf5'):
        start_time = time.time()
        arr = f['data'][BATCH_SIZE * step: BATCH_SIZE * (step+1)]
        read_time = time.time()
        arr = copy.deepcopy(arr)
        copy_time = time.time()
        durs.append(['hdf5 read', step, read_time - start_time])
        durs.append(['hdf5 copy', step, copy_time - read_time])
    f.close()
    os.remove(data_path)
    durs = pandas.DataFrame(durs, columns=['kind', 'stepno', 'dur'])
    return durs
项目:evaluation_tools    作者:JSALT-Rosetta    | 项目源码 | 文件源码
def mean_variance_normalisation(h5f, mvn_h5f, vad=None):
    """Do mean variance normlization. Optionnaly use a vad.

    Parameters:
    ----------
    h5f: str. h5features file name
    mvn_h5f: str, h5features output name
    """
    dset = h5py.File(h5f).keys()[0]
    if vad is not None:
        raise NotImplementedError
    else:
        data = h5py.File(h5f)[dset]['features'][:]
        features = data
    epsilon = np.finfo(data.dtype).eps
    mean = np.mean(data)
    std = np.std(data)
    mvn_features = (features - mean) / (std + epsilon)
    shutil.copy(h5f, mvn_h5f)
    h5py.File(mvn_h5f)[dset]['features'][:] = mvn_features
项目:evaluation_tools    作者:JSALT-Rosetta    | 项目源码 | 文件源码
def h5features_feats2stackedfeats(fb_h5f, stackedfb_h5f, nframes=7):
    """Create stacked features version of h5features file

    Parameters:
    ----------
    fb_h5f: str. h5features file name
    stackedfb_h5f: str, h5features output name
    """
    dset_name = h5py.File(fb_h5f).keys()[0]
    files = h5py.File(fb_h5f)[dset_name]['items']
    def aux(f):
        return stack_fbanks(h5features.read(fb_h5f, from_item=f)[1][f],
                            nframes=nframes)
    def time_f(f):
        return h5features.read(fb_h5f, from_item=f)[0][f]
    h5features_compute(files, stackedfb_h5f, featfunc=aux,
                      timefunc=time_f)
项目:compresso    作者:VCG    | 项目源码 | 文件源码
def load_data(name='ac3', N=-1, prefix=None, gold=False):
        '''Load data
        '''

        if not 'mri' in name:
            if gold: filename = '~/compresso/data/' + name + '/gold/' + name + '_gold.h5'
            else: filename = '~/compresso/data/' + name + '/rhoana/' + name + '_rhoana.h5'

            with h5py.File(os.path.expanduser(filename), 'r') as hf:
                output = np.array(hf['main'], dtype=np.uint64)
        else:
            filename = '~/compresso/data/MRI/' + name + '.h5'

            with h5py.File(os.path.expanduser(filename), 'r') as hf:
                output = np.array(hf['main'], dtype=np.uint64)

        if (not N == -1):
            output = output[0:N,:,:]

        return output
项目:mtcnn    作者:daikankan    | 项目源码 | 文件源码
def write_hdf5(file, data, label_class, label_bbox, label_landmarks):
  # transform to np array
  data_arr = np.array(data, dtype = np.float32)
  # print data_arr.shape
  # if no swapaxes, transpose to num * channel * width * height ???
  # data_arr = data_arr.transpose(0, 3, 2, 1)
  label_class_arr = np.array(label_class, dtype = np.float32)
  label_bbox_arr = np.array(label_bbox, dtype = np.float32)
  label_landmarks_arr = np.array(label_landmarks, dtype = np.float32)
  with h5py.File(file, 'w') as f:
    f['data'] = data_arr
    f['label_class'] = label_class_arr
    f['label_bbox'] = label_bbox_arr
    f['label_landmarks'] = label_landmarks_arr

# list_file format:
# image_path | label_class | label_boundingbox(4) | label_landmarks(10)
项目:higlass-server    作者:hms-dbmi    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser(description="""

    python add_attr_to_hdf5.py file.hdf5 attr_name attr_value

    Add an attribute to an HDF5 file.
""")

    parser.add_argument('filepath')
    parser.add_argument('attr_name')
    parser.add_argument('attr_value')
    #parser.add_argument('-o', '--options', default='yo',
    #                    help="Some option", type='str')
    #parser.add_argument('-u', '--useless', action='store_true', 
    #                    help='Another useless option')

    args = parser.parse_args()

    with h5py.File(args.filepath) as f:
        f.attrs[args.attr_name] = args.attr_value
项目:mpnum    作者:dseuss    | 项目源码 | 文件源码
def dump(self, target):
        """Serializes MPArray to :code:`h5py.Group`. Recover using
        :func:`~load`.

        :param target: :code:`h5py.Group` the instance should be saved to or
            path to h5 file (it's then serialized to /)

        """
        if isinstance(target, str):
            import h5py
            with h5py.File(target, 'w') as outfile:
                return self.dump(outfile)

        for prop in ('ranks', 'shape'):
            # these are only saved for convenience
            target.attrs[prop] = str(getattr(self, prop))

        # these are actually used in MPArray.load
        target.attrs['len'] = len(self)
        target.attrs['canonical_form'] = self.canonical_form

        for site, lten in enumerate(self._lt):
            target[str(site)] = lten
项目:mpnum    作者:dseuss    | 项目源码 | 文件源码
def test_dump_and_load(tmpdir, dtype):
    mpa = factory.random_mpa(5, [(4,), (2, 3), (1,), (4,), (4, 3)],
                             (4, 7, 1, 3), dtype=dtype)
    mpa.canonicalize(left=1, right=3)

    with h5.File(str(tmpdir / 'dump_load_test.h5'), 'w') as buf:
        newgroup = buf.create_group('mpa')
        mpa.dump(newgroup)
    with h5.File(str(tmpdir / 'dump_load_test.h5'), 'r') as buf:
        mpa_loaded = mp.MPArray.load(buf['mpa'])
    assert_mpa_identical(mpa, mpa_loaded)

    mpa.dump(str(tmpdir / 'dump_load_test_str.h5'))
    mpa_loaded = mp.MPArray.load(str(tmpdir / 'dump_load_test_str.h5'))
    assert_mpa_identical(mpa, mpa_loaded)


###############################################################################
#                            Algebraic operations                             #
###############################################################################
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def average_models(best, L=6, model_dir='', model_name='ra.h5'):
    print '... merging'
    print '{} {:d}-{:d}'.format(model_dir, best-L/2, best+L/2)
    params = {}
    side_info = {}
    attrs = {}
    for i in xrange(max(best-L/2, 0), best+L/2):
        with h5py.File(osp.join(model_dir, model_name+'.'+str(i)), 'r') as f:
            for k, v in f.attrs.items():
                attrs[k] = v
            for p in f.keys():
                if '#' not in p:
                    side_info[p] = f[p][...]
                elif p in params:
                    params[p] += np.array(f[p]).astype('float32') / L
                else:
                    params[p] = np.array(f[p]).astype('float32') / L
    with h5py.File(osp.join(model_dir, model_name+'.merge'), 'w') as f:
        for p in params.keys():
            f[p] = params[p]
        for s in side_info.keys():
            f[s] = side_info[s]
        for k, v in attrs.items():
            f.attrs[k] = v
项目:crema    作者:bmcfee    | 项目源码 | 文件源码
def save_h5(filename, **kwargs):
    '''Save data to an hdf5 file.

    Parameters
    ----------
    filename : str
        Path to the file

    kwargs
        key-value pairs of data

    See Also
    --------
    load_h5
    '''
    with h5py.File(filename, 'w') as hf:
        hf.update(kwargs)
项目:SNPmatch    作者:Gregor-Mendel-Institute    | 项目源码 | 文件源码
def save_as_hdf5_acc(g, outHDF5):
    NumAcc = len(g.accessions)
    log.info("Writing into HDF5 file acc wise")
    h5file = h5py.File(outHDF5, 'w')
    NumSNPs = len(g.snps)
    h5file.create_dataset('accessions', data=g.accessions, shape=(NumAcc,))
    h5file.create_dataset('positions', data=g.positions, shape=(NumSNPs,),dtype='i4')
    h5file['positions'].attrs['chrs'] = g.chrs
    h5file['positions'].attrs['chr_regions'] = g.chr_regions
    h5file.create_dataset('snps', shape=(NumSNPs, NumAcc), dtype='int8', compression="gzip", chunks=((NumSNPs, 1)))
    for i in range(NumAcc):
        h5file['snps'][:,i] = np.array(g.snps)[:,i]
        if i+1 % 10 == 0:
            log.info("written SNP info for %s accessions", i+1)
    h5file['snps'].attrs['data_format'] = g.data_format
    h5file['snps'].attrs['num_snps'] = NumSNPs
    h5file['snps'].attrs['num_accessions'] = NumAcc
    h5file.close()
项目:PleioPred    作者:yiminghu    | 项目源码 | 文件源码
def get_1000G_snps(sumstats, out_file):
    sf = np.loadtxt(sumstats,dtype=str,skiprows=1)
    h5f = h5py.File('ref/Misc/1000G_SNP_info.h5','r')
    rf = h5f['snp_chr'][:]
    h5f.close()
    ind1 = np.in1d(sf[:,1],rf[:,2])
    ind2 = np.in1d(rf[:,2],sf[:,1])
    sf1 = sf[ind1]
    rf1 = rf[ind2]
    ### check order ###
    if sum(sf1[:,1]==rf1[:,2])==len(rf1[:,2]):
        print 'Good!'
    else:
        print 'Shit happens, sorting sf1 to have the same order as rf1'
        O1 = np.argsort(sf1[:,1])
        O2 = np.argsort(rf1[:,2])
        O3 = np.argsort(O2)
        sf1 = sf1[O1][O3]
    out = ['hg19chrc snpid a1 a2 bp or p'+'\n']
    for i in range(len(sf1[:,1])):
        out.append(sf1[:,0][i]+' '+sf1[:,1][i]+' '+sf1[:,2][i]+' '+sf1[:,3][i]+' '+rf1[:,1][i]+' '+sf1[:,5][i]+' '+sf1[:,6][i]+'\n')
    ff = open(out_file,"w")
    ff.writelines(out)
    ff.close()
项目:text2image    作者:emansim    | 项目源码 | 文件源码
def load_weights(params, path, num_conv):
    print 'Loading gan weights from ' + path
    with h5py.File(path, 'r') as hdf5:
        params['skipthought2image'] = theano.shared(np.copy(hdf5['skipthought2image']))
        params['skipthought2image-bias'] = theano.shared(np.copy(hdf5['skipthought2image-bias']))

        for i in xrange(num_conv):
            params['W_conv{}'.format(i)] = theano.shared(np.copy(hdf5['W_conv{}'.format(i)]))
            params['b_conv{}'.format(i)] = theano.shared(np.copy(hdf5['b_conv{}'.format(i)]))

            # Flip w,h axes
            params['W_conv{}'.format(i)] = params['W_conv{}'.format(i)][:,:,::-1,::-1]

            w = np.abs(np.copy(hdf5['W_conv{}'.format(i)]))
            print 'W_conv{}'.format(i), np.min(w), np.mean(w), np.max(w)
            b = np.abs(np.copy(hdf5['b_conv{}'.format(i)]))
            print 'b_conv{}'.format(i), np.min(b), np.mean(b), np.max(b)

    return params
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def _load_sentences_embeddings(self):
        # load the test sentences and the expected LM embeddings
        with open(os.path.join(FIXTURES, 'sentences.json')) as fin:
            sentences = json.load(fin)

        # the expected embeddings
        expected_lm_embeddings = []
        for k in range(len(sentences)):
            embed_fname = os.path.join(
                    FIXTURES, 'lm_embeddings_{}.hdf5'.format(k)
            )
            expected_lm_embeddings.append([])
            with h5py.File(embed_fname, 'r') as fin:
                for i in range(10):
                    sent_embeds = fin['%s' % i][...]
                    sent_embeds_concat = numpy.concatenate(
                            (sent_embeds[0, :, :], sent_embeds[1, :, :]),
                            axis=-1
                    )
                    expected_lm_embeddings[-1].append(sent_embeds_concat)

        return sentences, expected_lm_embeddings
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def test_read_hdf5_format_file(self):
        vocab = Vocabulary()
        vocab.add_token_to_namespace("word")
        vocab.add_token_to_namespace("word2")
        embeddings_filename = self.TEST_DIR + "embeddings.hdf5"
        embeddings = numpy.random.rand(vocab.get_vocab_size(), 5)
        with h5py.File(embeddings_filename, 'w') as fout:
            _ = fout.create_dataset(
                    'embedding', embeddings.shape, dtype='float32', data=embeddings
            )

        params = Params({
                'pretrained_file': embeddings_filename,
                'embedding_dim': 5,
                })
        embedding_layer = Embedding.from_params(vocab, params)
        assert numpy.allclose(embedding_layer.weight.data.numpy(), embeddings)
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def test_read_hdf5_raises_on_invalid_shape(self):
        vocab = Vocabulary()
        vocab.add_token_to_namespace("word")
        embeddings_filename = self.TEST_DIR + "embeddings.hdf5"
        embeddings = numpy.random.rand(vocab.get_vocab_size(), 10)
        with h5py.File(embeddings_filename, 'w') as fout:
            _ = fout.create_dataset(
                    'embedding', embeddings.shape, dtype='float32', data=embeddings
            )

        params = Params({
                'pretrained_file': embeddings_filename,
                'embedding_dim': 5,
                })
        with pytest.raises(ConfigurationError):
            _ = Embedding.from_params(vocab, params)
项目:allennlp    作者:allenai    | 项目源码 | 文件源码
def _read_pretrained_hdf5_format_embedding_file(embeddings_filename: str, # pylint: disable=invalid-name
                                                embedding_dim: int,
                                                vocab: Vocabulary,
                                                namespace: str = "tokens") -> torch.FloatTensor:
    """
    Reads from a hdf5 formatted file.  The embedding matrix is assumed to
    be keyed by 'embedding' and of size ``(num_tokens, embedding_dim)``.
    """
    with h5py.File(embeddings_filename, 'r') as fin:
        embeddings = fin['embedding'][...]

    if list(embeddings.shape) != [vocab.get_vocab_size(namespace), embedding_dim]:
        raise ConfigurationError(
                "Read shape {0} embeddings from the file, but expected {1}".format(
                        list(embeddings.shape), [vocab.get_vocab_size(namespace), embedding_dim]))

    return torch.FloatTensor(embeddings)
项目:Learning-to-navigate-without-a-map    作者:ToniRV    | 项目源码 | 文件源码
def load_grid8(return_imsize=True):
    """Load grid 8x8.

    Parameters
    ----------
    return_imsize : bool
        return a tuple with grid size if True

    Returns
    -------
    db : h5py.File
        a HDF5 file object
    imsize : tuple
        (optional) grid size
    """
    file_path = os.path.join(rlvision.RLVISION_DATA,
                             "HDF5", "gridworld_8.hdf5")
    if not os.path.isfile(file_path):
        raise ValueError("The dataset %s is not existed!" % (file_path))

    if return_imsize is True:
        return h5py.File(file_path, mode="r"), (8, 8)
    else:
        return h5py.File(file_path, mode="r")
项目:keras-molecules    作者:maxhodak    | 项目源码 | 文件源码
def encoder(args, model):
    latent_dim = args.latent_dim
    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if args.save_h5:
        h5f = h5py.File(args.save_h5, 'w')
        h5f.create_dataset('charset', data = charset)
        h5f.create_dataset('latent_vectors', data = x_latent)
        h5f.close()
    else:
        np.savetxt(sys.stdout, x_latent, delimiter = '\t')
项目:keras-molecules    作者:maxhodak    | 项目源码 | 文件源码
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter = '\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data = charset)
            h5f.create_dataset('latent_vectors', data = x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)
项目:NuGridPy    作者:NuGrid    | 项目源码 | 文件源码
def fetch_data_one(self,dataitem,cycle):
        self.h5 = mrT.File(self.filename,'r')

        try:
            data = self.h5[self.cycle_header+str(cycle)]['SE_DATASET'][dataitem]
        except ValueError:
            try:
                data = self.h5[self.cycle_header+str(cycle)].attrs.get(dataitem, None)
            except TypeError:
                data = self.h5[self.cycle_header+str(cycle)][dataitem]

        try:
            while data.shape[0] < 2:
                data = data[0]
        except (IndexError, AttributeError):
            None


        self.h5.close()
        return data
项目:inferno    作者:inferno-pytorch    | 项目源码 | 文件源码
def fromh5(path, datapath=None, dataslice=None, asnumpy=True, preptrain=None):
    """
    Opens a hdf5 file at path, loads in the dataset at datapath, and returns dataset
    as a numpy array.
    """
    # Check if path exists (thanks Lukas!)
    assert os.path.exists(path), "Path {} does not exist.".format(path)
    # Init file
    h5file = h5.File(path)
    # Init dataset
    h5dataset = h5file[datapath] if datapath is not None else h5file.values()[0]
    # Slice dataset
    h5dataset = h5dataset[dataslice] if dataslice is not None else h5dataset
    # Convert to numpy if required
    h5dataset = np.asarray(h5dataset) if asnumpy else h5dataset
    # Apply preptrain
    h5dataset = preptrain(h5dataset) if preptrain is not None else h5dataset
    # Close file
    h5file.close()
    # Return
    return h5dataset
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def __check_valid_key__(self, key):
        file       = h5py.File(self.file_name)
        all_fields = []
        file.visit(all_fields.append)    
        if not key in all_fields:
            print_and_log(['The key %s can not be found in the dataset! Keys found are:' %key, 
                         ", ".join(all_fields)], 'error', logger)
            sys.exit(1)
        file.close()
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def _open(self, mode='r'):
        if mode in ['r+', 'w'] and self._parallel_write:
            self.my_file = h5py.File(self.file_name, mode=mode, driver='mpio', comm=comm)
        else:
            self.my_file = h5py.File(self.file_name, mode=mode)

        self.data = self.my_file.get(self.h5_key)
项目:spyking-circus    作者:spyking-circus    | 项目源码 | 文件源码
def set_streams(self, stream_mode):

        if stream_mode == 'single-file':

            sources     = []
            to_write    = []
            count       = 0
            params      = self.get_description()
            my_file     = h5py.File(self.file_name)
            all_matches = [re.findall('\d+', u) for u in my_file.keys()]
            all_streams = []
            for m in all_matches:
                if len(m) > 0:
                    all_streams += [int(m[0])]

            idx = numpy.argsort(all_streams)

            for i in xrange(len(all_streams)):
                params['h5_key']  = my_file.keys()[idx[i]]
                new_data          = type(self)(self.file_name, params)
                sources          += [new_data]
                to_write         += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]

            print_and_log(to_write, 'debug', logger)

            return sources

        elif stream_mode == 'multi-files':
            return H5File.set_streams(stream_mode)