Python h5py 模块，File() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用h5py.File()。

项目：spyking-circus 作者：spyking-circus | 项目源码 | 文件源码

def allocate(self, shape, data_dtype=None):

        if data_dtype is None:
            data_dtype = self.data_dtype

        if self._parallel_write:
            self.my_file = h5py.File(self.file_name, mode='w', driver='mpio', comm=comm)
            self.my_file.create_dataset(self.h5_key, dtype=data_dtype, shape=shape)
        else:
            self.my_file = h5py.File(self.file_name, mode='w')
            if self.is_master:
                if self.compression != '':
                    self.my_file.create_dataset(self.h5_key, dtype=data_dtype, shape=shape, compression=self.compression, chunks=True)
                else:
                    self.my_file.create_dataset(self.h5_key, dtype=data_dtype, shape=shape, chunks=True)

        self.my_file.close()
        self._read_from_header()

项目：spyking-circus 作者：spyking-circus | 项目源码 | 文件源码

def test_patch_for_similarities(params, extension):

    file_out_suff  = params.get('data', 'file_out_suff')
    template_file  = file_out_suff + '.templates%s.hdf5' %extension
    if os.path.exists(template_file):
        try:
            myfile = h5py.File(template_file, 'r', libver='latest')
            version = myfile.get('version')[0].decode('ascii')
            myfile.close()
        except Exception:
            version = None
    else:
        raise Exception('No templates found! Check suffix?')

    if version is not None:
        if (StrictVersion(version) >= StrictVersion('0.6.0')):
            return True
    else:
        print_and_log(["Version is below 0.6.0"], 'debug', logger)
        return False

项目：spyking-circus 作者：spyking-circus | 项目源码 | 文件源码

def test_validating(self):
        #mpi_launch('fitting', self.file_name, 2, 0, 'False')


        a, b            = os.path.splitext(os.path.basename(self.file_name))
        file_name, ext  = os.path.splitext(self.file_name)
        file_out        = os.path.join(os.path.abspath(file_name), a)
        result_name     = os.path.join(file_name, 'injected')
        spikes          = {}
        result          = h5py.File(os.path.join(result_name, '%s.result.hdf5' %a))
        for key in result.get('spiketimes').keys():
            spikes[key] = result.get('spiketimes/%s' %key)[:]

        juxta_file = file_out + '.juxta.dat'

        f = numpy.memmap(juxta_file, shape=(self.length,1), dtype=self.parser.get('validating', 'juxta_dtype'), mode='w+')
        f[spikes['temp_9']] = 100
        del f

        mpi_launch('validating', self.file_name, 2, 0, 'False')

项目：cellranger 作者：10XGenomics | 项目源码 | 文件源码

def report(self, summary_json_paths, barcode_summary_h5_path, recovered_cells, cell_bc_seqs):
        assert len(cell_bc_seqs) == len(self.matrices)

        barcode_summary_h5 = h5.File(barcode_summary_h5_path, 'r')

        d = {}

        d.update(self._report_genome_agnostic_metrics(
            summary_json_paths, barcode_summary_h5, recovered_cells, cell_bc_seqs))

        # Compute genome-specific metrics
        for i, (genome, matrix) in enumerate(self.matrices.iteritems()):
            for key, value in matrix.report(genome,
                                            barcode_summary_h5,
                                            recovered_cells,
                                            cell_bc_seqs=cell_bc_seqs[i],
                                        ).iteritems():
                key = '_'.join([genome, key])
                d[key] = value
        return d

项目：cellranger 作者：10XGenomics | 项目源码 | 文件源码

def write_data_frame(fn, df):
    ''' Write the pandas dataframe object to an HDF5 file.  Each column is written as a single 1D dataset at the top
    level of the HDF5 file, using the native pandas datatype'''

    # Always write a fresh file -- the 'w' argument to h5py.File is supposed to truncate an existing file, but it doesn't appear to work correctly
    if os.path.exists(fn):
        os.remove(fn)

    f = h5py.File(fn, "w")

    # To preserve column order, write columns to an attribute
    column_names = np.array(list(df.columns))
    f.attrs.create("column_names", column_names)

    for col in df.columns:
        write_data_column(f, df[col])

    f.close()

项目：cellranger 作者：10XGenomics | 项目源码 | 文件源码

def read_data_frame(fn, query_cols=[]):
    ''' Load a pandas DataFrame from an HDF5 file. If a column list is specified, only load the matching columns '''

    with h5py.File(fn, 'r') as f:

        column_names = f.attrs.get("column_names")
        column_names = get_column_intersection(column_names, query_cols)

        df = p.DataFrame()

        # Add the columns progressively to save memory
        for name in column_names:
            ds = f[name]
            if has_levels(ds):
                indices = ds[:]
                uniques = get_levels(ds)
                # This method of constructing of Categorical avoids copying the indices array
                # which saves memory for big datasets
                df[name] = p.Categorical(indices, categories=uniques, ordered=False, fastpath=True)
            else:
                df[name] = p.Series(ds[:])

        return df

项目：cellranger 作者：10XGenomics | 项目源码 | 文件源码

def read_data_frame_indexed_no_concat(fn, tabix_queries, query_cols = [], coords = True):
    ''' Read rows from the HDF5 data frame that match each tabix query in the
    queries list.  A tabix query is in the form ('chr1', 100, 200). query_cols
    is a list of columns you want to return. If coords is True, then it it will
    return coordinates regardless of query_cols. If coords is False, it will
    only return the columns specified in query_cols. Returns a list of pandas
    DataFrames, one for each query. '''

    f = h5py.File(fn, 'r')

    # read the index
    tabix_index = read_tabix_index(f)

    dfs = []
    for q in tabix_queries:
        r = _read_data_frame_indexed_sub(f, tabix_index, q, query_cols = query_cols, coords = coords)
        dfs.append(r)

    f.close()

    # Return the union of the queries
    return dfs

项目：Fast5-to-Fastq 作者：rrwick | 项目源码 | 文件源码

def check_filters(fast5_file, min_length, min_mean_qual, min_qual_window, window_size):
    try:
        hdf5_file = h5py.File(fast5_file, 'r')
        names = get_hdf5_names(hdf5_file)
        basecall_location = get_best_fastq_hdf5_location(hdf5_file, names)
        if basecall_location:
            fastq_str = hdf5_file[basecall_location].value
            try:
                parts = fastq_str.split(b'\n')
                seq, quals = parts[1], parts[3]
            except IndexError:
                fastq_str, seq, quals = '', '', ''
            if not fastq_str or not seq:
                return False, 0
            if min_mean_qual and get_mean_qscore(quals) < min_mean_qual:
                return False, 0
            if min_length and len(seq) < min_length:
                return False, 0
            if min_qual_window and get_min_window_qscore(quals, window_size) < min_qual_window:
                return False, 0
            return True, len(seq)
    except (IOError, RuntimeError):
        pass
    return False, 0

项目：Fast5-to-Fastq 作者：rrwick | 项目源码 | 文件源码

def min_window_qual_and_length(fast5_file, window_size):
    try:
        hdf5_file = h5py.File(fast5_file, 'r')
        names = get_hdf5_names(hdf5_file)
        basecall_location = get_best_fastq_hdf5_location(hdf5_file, names)
        if basecall_location:
            fastq_str = hdf5_file[basecall_location].value
            try:
                parts = fastq_str.split(b'\n')
                seq, quals = parts[1], parts[3]
                return get_min_window_qscore(quals, window_size), len(seq), fast5_file
            except IndexError:
                pass
    except (IOError, RuntimeError):
        pass
    return 0.0, 0, fast5_file

项目：pointnet 作者：charlesq34 | 项目源码 | 文件源码

def save_h5_data_label_normal(h5_filename, data, label, normal, 
        data_dtype='float32', label_dtype='uint8', noral_dtype='float32'):
    h5_fout = h5py.File(h5_filename)
    h5_fout.create_dataset(
            'data', data=data,
            compression='gzip', compression_opts=4,
            dtype=data_dtype)
    h5_fout.create_dataset(
            'normal', data=normal,
            compression='gzip', compression_opts=4,
            dtype=normal_dtype)
    h5_fout.create_dataset(
            'label', data=label,
            compression='gzip', compression_opts=1,
            dtype=label_dtype)
    h5_fout.close()


# Write numpy array data and label to h5_filename

项目：genomedisco 作者：kundajelab | 项目源码 | 文件源码

def main():
    parser = generate_parser()
    args = parser.parse_args()
    infile1 = h5py.File(args.input1, 'r')
    infile2 = h5py.File(args.input2, 'r')
    resolutions = numpy.intersect1d(infile1['resolutions'][...], infile2['resolutions'][...])
    chroms = numpy.intersect1d(infile2['chromosomes'][...], infile2['chromosomes'][...])
    results = {}
    data1 = load_data(infile1, chroms, resolutions)
    data2 = load_data(infile2, chroms, resolutions)
    infile1.close()
    infile2.close()
    results = {}
    results[(args.input1.split('/')[-1].strip('.quasar'), args.input2.split('/')[-1].strip('.quasar'))] = correlate_samples(data1, data2)
    for resolution in data1.keys():
        for chromo in chroms:
            plt.scatter(data1[resolution][chromo][1].flatten(),data2[resolution][chromo][1].flatten(),alpha=0.1,color='red')
            plt.show()
            plt.savefig(args.output+'.res'+str(resolution)+'.chr'+chromo+'.pdf')

项目：genomedisco 作者：kundajelab | 项目源码 | 文件源码

def fill_hdf5_with_sparse_by_chunk(mym1,mym2,fname,chunksize):
    start1=0
    end1=0
    n=mym1.shape[0]

    f=h5py.File(fname,'w')
    m1hdf5=f.create_dataset('m1',shape=(n,n),dtype='float')
    m2hdf5=f.create_dataset('m2',shape=(n,n),dtype='float')

    while end1<n:
        end1=np.min([n,(start1+chunksize)])
        print 'start1: '+str(start1)

        if (end1-start1)==1:
            m1hdf5[start1,:]=mym1[start1,:].toarray()
            m2hdf5[start1,:]=mym2[start1,:].toarray()
        else:
            m1hdf5[start1:end1,:]=mym1[start1:end1,:].toarray()
            m2hdf5[start1:end1,:]=mym2[start1:end1,:].toarray()
        start1=end1
    print 'sum of 1'
    print m1hdf5[:,:].sum()
    print m2hdf5[:,:].sum()
    f.close()