Python tables 模块,open_file() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tables.open_file()

项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_h5(filename):
        with tables.open_file(filename, 'r') as f:
            group = f.root._v_groups[cr_constants.ANALYSIS_H5_MATRIX_GROUP]
            matrix = cr_matrix.GeneBCMatrix.load(group)

            analysis = SingleGenomeAnalysis(matrix)
            group = f.root._v_groups[cr_constants.ANALYSIS_H5_PCA_GROUP]
            analysis._load_pca_h5(group)

            group = f.root._v_groups[cr_constants.ANALYSIS_H5_CLUSTERING_GROUP]
            analysis._load_clustering_h5(group)

            group = f.root._v_groups[cr_constants.ANALYSIS_H5_DIFFERENTIAL_EXPRESSION_GROUP]
            analysis._load_differential_expression_h5(group)

            group = f.root._v_groups[cr_constants.ANALYSIS_H5_TSNE_GROUP]
            analysis._load_tsne_h5(group)

        return analysis
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def main(args, outs):
    if args.skip or args.is_multi_genome:
        return

    tsne_dims = args.tsne_dims

    matrix = cr_matrix.GeneBCMatrix.load_h5(args.matrix_h5)
    pca = cr_pca.load_pca_from_h5(args.pca_h5)
    tsne = cr_tsne.run_tsne(pca.transformed_pca_matrix, input_pcs=args.input_pcs, perplexity=args.perplexity,
                     theta=args.theta, tsne_dims=tsne_dims, max_iter=args.max_iter, stop_lying_iter=args.stop_lying_iter,
                     mom_switch_iter=args.mom_switch_iter, random_state=args.random_seed)
    tsne_map = {tsne_dims: tsne}

    filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
    with tables.open_file(outs.tsne_h5, 'w', filters = filters) as f:
        cr_tsne.save_tsne_h5(tsne_map, f)

    cr_tsne.save_tsne_csv(tsne_map, matrix, outs.tsne_csv)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def merge_barcode_summaries(input_files, output_file):
    # each chunk produces a barcode summary containing ALL barcodes from ALL gem groups, not just the ones being counted
    # in that chunk. so the datasets need to be squashed rather than concatenated
    with tables.open_file(output_file, mode = 'w') as fout:
        dsets = {}
        # init datasets using the first input
        if len(input_files) > 0:
            with tables.open_file(input_files[0], mode = 'r') as fin:
                for node in fin.walk_nodes('/', 'Array'):
                    dsets[node.name] = fout.create_carray('/', node.name, obj=node[:])
        # add data from the other inputs
        for input_file in input_files[1:]:
            with tables.open_file(input_file, mode = 'r') as fin:
                for (name, carray) in dsets.iteritems():
                    if name == cr_constants.H5_BC_SEQUENCE_COL:
                        continue # don't modify the barcode sequences
                    carray[:] += fin.get_node('/', name)[:]
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def __init__(self, filename, mode, batch_size=5): 
        """
        An iterable database that should theoretically allow 
        scalable reading/writing of datasets. 
           batch_size: length of list

        Notes: 
           meta_file should contain all the related meta data 
        including keys, their corresponding value lengths, 
        overall file size etc
        """
        fn = os.path.expanduser(filename)
        if mode == 'w' or mode == 'a': 
            print('{}::{} with batch size: {}'.format(
                'Writing' if mode == 'w' else 'Appending', 
                self.__class__.__name__, batch_size))
            self.h5f_ = tb.open_file(fn, mode=mode, title='%s' % fn)
            self.data_ = {}
        elif mode == 'r': 
            self.h5f_ = tb.open_file(fn, mode=mode, title='%s' % fn)
            print('{}::Loaded with fields: {}'.format(self.__class__.__name__, self.keys))
        else: 
            raise RuntimeError('Unknown mode %s' % mode)
项目:evaluation_tools    作者:JSALT-Rosetta    | 项目源码 | 文件源码
def af_h5_to_np(input_path, outpath):

    files = tables.open_file(input_path, mode = 'r+')
    speaker_nodes = files.root._f_list_nodes()

    for spk in speaker_nodes:
        file_nodes = spk._f_list_nodes()
        for fls in file_nodes:
            file_name = fls._v_name
            af_nodes = fls._f_list_nodes()
            af_list = []
            for fts in af_nodes:
                features = fts[:]
                mean = numpy.mean(features,1)
                normalised_feats = list(numpy.transpose(features)/mean)
                af_list += normalised_feats
            numpy.save(outpath + file_name, numpy.array(af_list))
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def dump_stec_map(h5_fname, stec_map):
    """ ??? """
    h5file = open_file(h5_fname, mode='w', title='IRI simulated slant TEC')
    group = h5file.create_group('/', 'phase_arcs', 'Phase connected arcs')
    if hasattr(stec_map, 'xyz'):
        group._v_attrs.xyz = stec_map.xyz
    if hasattr(stec_map, 'llh'):
        group._v_attrs.llh = stec_map.llh
    for sat in sorted(stec_map):
        assert sat[0] == 'G'
        table = h5file.create_table(group, sat, STecTable, 'GPS prn={} data'.format(sat[1:]))
        row = table.row
        for dt, stec_info in stec_map[sat].iteritems():
            row['dt'] = (dt - UNIX_EPOCH).total_seconds()
            row['stec'] = stec_info.stec
            row['az'] = stec_info.az
            row['el'] = stec_info.el
            row['satx'] = stec_info.satx
            row['saty'] = stec_info.saty
            row['satz'] = stec_info.satz
            row.append()
        table.flush()
    h5file.close()
    return h5_fname
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def sparse_save(matrix, filename, dtype=np.dtype(np.float64)):
    print "SAVE SPARSE"
    print matrix.shape

    atom = tb.Atom.from_dtype(dtype)

    f = tb.open_file(filename, 'w')

    print "saving data"
    filters = tb.Filters(complevel=5, complib='blosc')
    out = f.create_carray(f.root, 'data', atom, shape=matrix.data.shape, filters=filters)
    out[:] = matrix.data

    print "saving indices"
    out = f.create_carray(f.root, 'indices', tb.Int64Atom(), shape=matrix.indices.shape, filters=filters)
    out[:] = matrix.indices

    print "saving indptr"
    out = f.create_carray(f.root, 'indptr', tb.Int64Atom(), shape=matrix.indptr.shape, filters=filters)
    out[:] = matrix.indptr

    print "saving done"

    f.close()
项目:uncover-ml    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def export_crossval(crossval_output, config):
    outfile_scores = os.path.join(config.output_dir,
                                  config.name + "_scores.json")
    with open(outfile_scores, 'w') as f:
        json.dump(crossval_output.scores, f, sort_keys=True, indent=4)

    outfile_results = os.path.join(config.output_dir,
                                   config.name + "_results.hdf5")
    with hdf.open_file(outfile_results, 'w') as f:
        for fld, v in crossval_output.y_pred.items():
            label = "_".join(fld.split())
            f.create_array("/", label, obj=v.data)
            f.create_array("/", label + "_mask", obj=v.mask)
        f.create_array("/", "y_true", obj=crossval_output.y_true)

    create_scatter_plot(outfile_results, config)
项目:uncover-ml    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def create_scatter_plot(outfile_results, config):
    true_vs_pred = os.path.join(config.output_dir,
                                config.name + "_results.csv")
    true_vs_pred_plot = os.path.join(config.output_dir,
                                     config.name + "_results.png")
    with hdf.open_file(outfile_results, 'r') as f:
        prediction = f.get_node("/", "Prediction").read()
        y_true = f.get_node("/", "y_true").read()
        np.savetxt(true_vs_pred, X=np.vstack([y_true, prediction]).T,
                   delimiter=',')
        plt.figure()
        plt.scatter(y_true, prediction)
        plt.title('true vs prediction')
        plt.xlabel('True')
        plt.ylabel('Prediction')
        plt.savefig(true_vs_pred_plot)
项目:kripodb    作者:3D-e-Chem    | 项目源码 | 文件源码
def open_similarity_matrix(fn):
    """Open read-only similarity matrix file.

    Args:
        fn (str): Filename of similarity matrix

    Returns:
        SimilarityMatrix | FrozenSimilarityMatrix: A read-only similarity matrix object

    """
    # peek in file to detect format
    f = tables.open_file(fn, 'r')
    is_frozen = 'scores' in f.root
    f.close()
    if is_frozen:
        matrix = FrozenSimilarityMatrix(fn)
    else:
        matrix = SimilarityMatrix(fn, cache_labels=True)
    return matrix
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def save_file_origen(file, *, ORIGEN_data, lib, nucs, start_nuclide, time,
    phi, ORIGEN_time, n_fission_fragments=2.004):

    with tables.open_file(file, mode="a", title="ORIGEN and CRAM data",
        filters=tables.Filters(complevel=1)) as h5file:

        if lib not in h5file.root:
            create_hdf5_table(file, lib, nucs)

        table = h5file.get_node(h5file.root, lib + '/origen')
        table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs)
        table.row['library'] = lib
        table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments)
        table.row['time'] = time
        table.row['phi'] = phi
        table.row['n_fission_fragments'] = n_fission_fragments
        table.row['execution time ORIGEN'] = ORIGEN_time
        table.row['ORIGEN atom fraction'] = origen_data_to_array_weighted(ORIGEN_data, nucs, n_fission_fragments=n_fission_fragments)
        table.row['ORIGEN mass fraction'] = origen_data_to_array_materials(ORIGEN_data, nucs)
        table.row.append()
        table.flush()
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def save_file_cram_lambdify(file, *, CRAM_lambdify_res, lib, nucs, start_nuclide, time,
    phi, CRAM_lambdify_time, umfpack, n_fission_fragments=2.004):
    assert len(CRAM_lambdify_res) == len(nucs)
    with tables.open_file(file, mode="a", title="ORIGEN and CRAM data",
        filters=tables.Filters(complevel=1)) as h5file:

        if lib not in h5file.root:
            create_hdf5_table(file, lib, nucs)

        nodename = '/cram-lambdify-umfpack' if umfpack else '/cram-lambdify-superlu'
        table = h5file.get_node(h5file.root, lib + nodename)
        table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs)
        table.row['library'] = lib
        table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments)
        table.row['time'] = time
        table.row['phi'] = phi
        table.row['n_fission_fragments'] = n_fission_fragments
        table.row['execution time CRAM lambdify'] = CRAM_lambdify_time
        table.row['CRAM lambdify atom fraction'] = CRAM_lambdify_res
        CRAM_lambdify_res_normalized = CRAM_lambdify_res/np.sum(CRAM_lambdify_res)
        table.row['CRAM lambdify mass fraction'] = CRAM_lambdify_res_normalized
        table.row.append()
        table.flush()
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def save_file_cram_py_solve(file, *, CRAM_py_solve_res, lib, nucs, start_nuclide, time,
    phi, CRAM_py_solve_time, n_fission_fragments=2.004):
    assert len(CRAM_py_solve_res) == len(nucs)
    with tables.open_file(file, mode="a", title="ORIGEN and CRAM data",
        filters=tables.Filters(complevel=1)) as h5file:

        if lib not in h5file.root:
            create_hdf5_table(file, lib, nucs)

        table = h5file.get_node(h5file.root, lib + '/cram-py_solve')
        table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs)
        table.row['library'] = lib
        table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments)
        table.row['time'] = time
        table.row['phi'] = phi
        table.row['n_fission_fragments'] = n_fission_fragments
        table.row['execution time CRAM py_solve'] = CRAM_py_solve_time
        table.row['CRAM py_solve atom fraction'] = CRAM_py_solve_res
        CRAM_py_solve_res_normalized = CRAM_py_solve_res/np.sum(CRAM_py_solve_res)
        table.row['CRAM py_solve mass fraction'] = CRAM_py_solve_res_normalized
        table.row.append()
        table.flush()
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def write(self, frames):
        """
        Write the frames to the target HDF5 file, using the format used by
        ``pd.Panel.to_hdf``

        Parameters
        ----------
        frames : iter[(int, DataFrame)] or dict[int -> DataFrame]
            An iterable or other mapping of sid to the corresponding OHLCV
            pricing data.
        """
        with HDFStore(self._path, 'w',
                      complevel=self._complevel, complib=self._complib) \
                as store:
            panel = pd.Panel.from_dict(dict(frames))
            panel.to_hdf(store, 'updates')
        with tables.open_file(self._path, mode='r+') as h5file:
            h5file.set_node_attr('/', 'version', 0)
项目:tu-dortmund-ice-cube    作者:wjam1995    | 项目源码 | 文件源码
def get_number_of_rows(files, verbose):
    # Swap out tqdm if not verbose
    def verbose_passthrough(a):
        return a

    fn = None
    if verbose:
        fn = tqdm
    else:
        fn = verbose_passthrough

    n_wfs = 0
    for filename in fn(files):
        f = tables.open_file(filename)
        waveform = f.get_node('/waveforms')
        n_wfs += waveform.nrows
        f.close()
    return n_wfs
项目:recipe_zs2017_track2    作者:kamperh    | 项目源码 | 文件源码
def main():
    args = check_argv()

    print("Reading:", args.mat_fn)
    mat = tables.open_file(args.mat_fn)

    n_audio = mat.root.files_train[0].shape[0]
    print("No. audio files:", n_audio)

    filenames = []
    for i_audio in xrange(n_audio):
        filenames.append("".join([chr(i[0]) for i in mat.root.files_train[0][i_audio][0]]))
    audio_keys = [path.splitext(path.split(i)[-1])[0] for i in filenames]

    features_dict = {}
    for i_audio in xrange(n_audio):
        features = mat.root.F_train_iter[0][i_audio][0]
        features_dict[audio_keys[i_audio].replace("_", "-")] = features.T

    print("Writing:", args.npz_fn)
    np.savez(args.npz_fn, **features_dict)
项目:oasis    作者:ngmarchant    | 项目源码 | 文件源码
def read_h5(self, h5_path, load_features=False):

        h5_file = tables.open_file(h5_path, mode = 'r')

        if load_features and hasattr(h5_file.root, "features"):
            self.features = h5_file.root.features[:,:]
            self.num_fts = h5_file.root.features.shape[1]
        if hasattr(h5_file.root, "labels"):
            self.labels = h5_file.root.labels[:]
            self.num_items = len(self.labels)
        if hasattr(h5_file.root, "scores"):
            self.scores = h5_file.root.scores[:]
            self.num_items = len(self.scores)
        if hasattr(h5_file.root, "probs"):
            self.probs = h5_file.root.probs[:]
            self.num_items = len(self.probs)
        if hasattr(h5_file.root, "preds"):
            self.preds = h5_file.root.preds[:]
            self.num_items = len(self.preds)

        h5_file.close()
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def load_from_hdf5(self, path):
        """load model in compressed sparse row format from hdf5 file

        hdf5 file should contain row_ptr, col_ind and data array

        Args:
            path: path to the embeddings folder
        """
        self.load_metadata(path)
        f = tables.open_file(os.path.join(path, 'cooccurrence_csr.h5p'), 'r')
        row_ptr = np.nan_to_num(f.root.row_ptr.read())
        col_ind = np.nan_to_num(f.root.col_ind.read())
        data = np.nan_to_num(f.root.data.read())
        dim = row_ptr.shape[0] - 1
        self.matrix = scipy.sparse.csr_matrix(
            (data, col_ind, row_ptr), shape=(dim, dim), dtype=np.float32)
        f.close()
        self.vocabulary = Vocabulary_cooccurrence()
        self.vocabulary.load(path)
        self.name += os.path.basename(os.path.normpath(path))
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def load_with_alpha(self, path, power=0.6):
        # self.load_provenance(path)
        f = tables.open_file(os.path.join(path, 'vectors.h5p'), 'r')
#        left = np.nan_to_num(f.root.vectors.read())
        left = f.root.vectors.read()
        sigma = f.root.sigma.read()
        logger.info("loaded left singular vectors and sigma")
        sigma = np.power(sigma, power)
        self.matrix = np.dot(left, np.diag(sigma))
        logger.info("computed the product")
        self.metadata["pow_sigma"] = power
        self.metadata["size_dimensions"] = int(self.matrix.shape[1])
        f.close()
        self.vocabulary = Vocabulary_simple()
        self.vocabulary.load(path)
        self.name += os.path.basename(os.path.normpath(path)) + "_a" + str(power)
项目:msprime-abc    作者:DomNelson    | 项目源码 | 文件源码
def genotypes(self, nodes, h5file):
        """
        Returns the full genotype associated with the provided node
        """
        with tables.open_file(h5file, 'r') as f:
            ind_IDs = f.root.inds[:]
            uID_idx = dict([(ID, i) for i, ID in enumerate(ind_IDs)])

            genotypes = {}
            for node in nodes:
                ID = self.idx_haps[node]
                uID = np.abs(ID).astype(int)

                try:
                    file_idx = uID_idx[uID]
                except KeyError:
                    print("No genotype for", node)
                    continue

                chrom = signed_to_bool(np.sign(ID))
                genotypes[node] = f.root.haps[file_idx][chrom]
                assert f.root.inds[file_idx] == uID

        return genotypes
项目:batchup    作者:Britefury    | 项目源码 | 文件源码
def fetch_svhn_train_test(source_paths, target_path):
    train_path, test_path = source_paths

    f_out = tables.open_file(target_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data')

    # Load in the training data Matlab file
    print('Converting {} to HDF5...'.format(train_path))
    train_X_u8, train_y = _read_svhn_matlab(train_path)
    f_out.create_array(g_out, 'train_X_u8', train_X_u8)
    f_out.create_array(g_out, 'train_y', train_y)
    del train_X_u8
    del train_y

    # Load in the test data Matlab file
    print('Converting {} to HDF5...'.format(test_path))
    test_X_u8, test_y = _read_svhn_matlab(test_path)
    f_out.create_array(g_out, 'test_X_u8', test_X_u8)
    f_out.create_array(g_out, 'test_y', test_y)
    del test_X_u8
    del test_y

    f_out.close()

    return target_path
项目:batchup    作者:Britefury    | 项目源码 | 文件源码
def fetch_svhn_extra(source_paths, target_path):
    extra_path = source_paths[0]

    print('Converting {} to HDF5 (compressed)...'.format(extra_path))
    f_out = tables.open_file(target_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data')
    filters = tables.Filters(complevel=9, complib='blosc')
    X_u8_arr = f_out.create_earray(
        g_out, 'extra_X_u8', tables.UInt8Atom(), (0, 3, 32, 32),
        filters=filters)
    y_arr = f_out.create_earray(
        g_out, 'extra_y', tables.Int32Atom(), (0,), filters=filters)

    # Load in the extra data Matlab file
    _insert_svhn_matlab_to_h5(X_u8_arr, y_arr, extra_path)

    f_out.close()

    return target_path
项目:batchup    作者:Britefury    | 项目源码 | 文件源码
def __init__(self, n_val=729):
        data_path = usps_data()

        if data_path is not None:
            f = tables.open_file(data_path, mode='r')

            train_X = f.root.usps.train_X
            train_y = f.root.usps.train_y
            test_X = f.root.usps.test_X
            test_y = f.root.usps.test_y

            if n_val == 0 or n_val is None:
                self.train_X, self.train_y = train_X, train_y
                self.val_X = np.zeros((0, 1, 16, 16), dtype=np.float32)
                self.val_y = np.zeros((0,), dtype=np.int32)
            else:
                self.train_X, self.val_X = train_X[:-n_val], train_X[-n_val:]
                self.train_y, self.val_y = train_y[:-n_val], train_y[-n_val:]
            self.test_X, self.test_y = test_X, test_y
项目:batchup    作者:Britefury    | 项目源码 | 文件源码
def _convert_mnist(dataset_name, target_path, train_X_path, train_y_path,
                   test_X_path, test_y_path):
    print('Convering {} to HDF5'.format(dataset_name))
    train_X_u8 = _read_mnist_images(train_X_path)
    train_y = _read_mnist_labels(train_y_path)
    test_X_u8 = _read_mnist_images(test_X_path)
    test_y = _read_mnist_labels(test_y_path)

    f_out = tables.open_file(target_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'mnist', 'MNIST data')
    f_out.create_array(g_out, 'train_X_u8', train_X_u8)
    f_out.create_array(g_out, 'train_y', train_y)
    f_out.create_array(g_out, 'test_X_u8', test_X_u8)
    f_out.create_array(g_out, 'test_y', test_y)

    f_out.close()

    return target_path
项目:batchup    作者:Britefury    | 项目源码 | 文件源码
def __init__(self, h5_path, n_val=10000, val_lower=0.0, val_upper=1.0):
        f = tables.open_file(h5_path, mode='r')

        train_X_u8 = f.root.mnist.train_X_u8
        train_y = f.root.mnist.train_y
        self.test_X_u8 = f.root.mnist.test_X_u8
        self.test_y = f.root.mnist.test_y

        if n_val == 0 or n_val is None:
            self.train_X_u8 = train_X_u8
            self.train_y = train_y
            self.val_X_u8 = np.zeros((0, 1, 28, 28), dtype=np.uint8)
            self.val_y = np.zeros((0,), dtype=np.int32)
        else:
            self.train_X_u8 = train_X_u8[:-n_val]
            self.val_X_u8 = train_X_u8[-n_val:]
            self.train_y, self.val_y = train_y[:-n_val], train_y[-n_val:]

        self.train_X = ImageArrayUInt8ToFloat32(self.train_X_u8, val_lower,
                                                val_upper)
        self.val_X = ImageArrayUInt8ToFloat32(self.val_X_u8, val_lower,
                                              val_upper)
        self.test_X = ImageArrayUInt8ToFloat32(self.test_X_u8, val_lower,
                                               val_upper)
项目:multitables    作者:ghcollin    | 项目源码 | 文件源码
def __enter__(self):
        import tables
        if self.filename is None:
            self.filedir = tempfile.mkdtemp()
            self.filename = os.path.join(self.filedir, 'bench.h5')
        else:
            self.filedir = None

        h5_file = tables.open_file(self.filename, 'w')
        array_kw_args = {}
        if self.complevel > 0:
            array_kw_args['filters'] = tables.Filters(complib=self.complib, complevel=self.complevel)

        array_path = '/bench'
        #ary = h5_file.create_array(h5_file.root, array_path[1:],
        #                           np.arange(np.prod(file_shape), dtype=file_type).reshape(file_shape))
        ary = h5_file.create_earray(h5_file.root, array_path[1:], atom=tables.Atom.from_dtype(file_type),
                                    shape=file_shape, expectedrows=self.n_rows, **array_kw_args)
        for _ in range(0, self.n_rows, 2**10):
            ary.append(2**8*np.random.randn(2**10, *file_shape[1:]))
        print(ary.shape)

        h5_file.close()

        return self.filename, array_path
项目:multitables    作者:ghcollin    | 项目源码 | 文件源码
def setUp(self):
        self.test_dir = tempfile.mkdtemp()
        self.test_filename = os.path.join(self.test_dir, 'test.h5')
        test_file = tables.open_file(self.test_filename, 'w')

        self.test_array = np.arange(100*1000).reshape((1000, 10, 10))
        self.test_array_path = '/test_array'
        array = test_file.create_array(test_file.root, self.test_array_path[1:], self.test_array)

        self.test_table_ary = np.array([ (
            np.random.randint(256, size=np.prod(test_table_col_A_shape)).reshape(test_table_col_A_shape),
            np.random.rand(*test_table_col_B_shape)) for _ in range(100) ],
                                       dtype=tables.dtype_from_descr(TestTableRow))
        self.test_table_path = '/test_table'
        table = test_file.create_table(test_file.root, self.test_table_path[1:], TestTableRow)
        table.append(self.test_table_ary)

        test_file.close()
项目:PyDeepGP    作者:SheffieldML    | 项目源码 | 文件源码
def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.datafile))
        m = deepgp.DeepGP([Y.shape[1],5,2],Y,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(2,ARD=True)], num_inducing=2, back_constraint=False)
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()

        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L
项目:PyDeepGP    作者:SheffieldML    | 项目源码 | 文件源码
def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.outputfile))
        X = np.loadtxt(os.path.join(base_path,self.inputfile))
        m = deepgp.DeepGP([Y.shape[1],5,X.shape[1]],Y, X=X,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(X.shape[1],ARD=True)], num_inducing=2, back_constraint=False)
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()

        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L
项目:PyDeepGP    作者:SheffieldML    | 项目源码 | 文件源码
def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.datafile))
        m = deepgp.DeepGP([Y.shape[1],5,2],Y,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(2,ARD=True)], num_inducing=2, back_constraint=True, encoder_dims=[[3],[2]])
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()

        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L
项目:PyDeepGP    作者:SheffieldML    | 项目源码 | 文件源码
def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.outputfile))
        X = np.loadtxt(os.path.join(base_path,self.inputfile))
        m = deepgp.DeepGP([Y.shape[1],5,X.shape[1]],Y, X=X,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(X.shape[1],ARD=True)], num_inducing=2, back_constraint=True, encoder_dims=[[3]])
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()

        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L
项目:ProjectOfDataMining    作者:IljaNovo    | 项目源码 | 文件源码
def process(self, rows_slice):
        with Worker.hdf5_lock:
            with tables.open_file(self.hdf5_file, 'r+') as fileh:
                hdf5_array = fileh.get_node(self.path)
                X = hdf5_array[rows_slice, ...]

        eensy = np.finfo(np.float32).eps
        weensy =  np.finfo(np.float32).tiny * 100
        tmp = self.random_state.randn(rows_slice.stop - rows_slice.start, self.N)
        X += (eensy * X + weensy) * tmp

        with Worker.hdf5_lock:
            with tables.open_file(self.hdf5_file, 'r+') as fileh:
                hdf5_array = fileh.get_node(self.path)
                hdf5_array[rows_slice, ...] = X

        del X
项目:ProjectOfDataMining    作者:IljaNovo    | 项目源码 | 文件源码
def cluster_labels_A(hdf5_file, c, lock, I, rows_slice):
    """One of the task to be performed by a pool of subprocesses, as the first
        step in identifying the cluster labels and indices of the cluster centers
        for Affinity Propagation clustering.
    """

    with Worker.hdf5_lock:
        with tables.open_file(hdf5_file, 'r+') as fileh:
            S = fileh.root.aff_prop_group.similarities
            s = S[rows_slice, ...]

    s = np.argmax(s[:, I], axis = 1)

    with lock:        
        c[rows_slice] = s[:]

    del s
项目:ProjectOfDataMining    作者:IljaNovo    | 项目源码 | 文件源码
def cluster_labels_B(hdf5_file, s_reduced, lock, I, ii, iix, rows_slice):
    """Second task to be performed by a pool of subprocesses before
        the cluster labels and cluster center indices can be identified.
    """

    with Worker.hdf5_lock:
        with tables.open_file(hdf5_file, 'r+') as fileh:
            S = fileh.root.aff_prop_group.similarities
            s = S[rows_slice, ...]

    s = s[:, ii]
    s = s[iix[rows_slice]]

    with lock:                
        s_reduced += s[:].sum(axis = 0)

    del s
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def combine_h5_files(in_files, out_file, groups):
    fins = [tables.open_file(filename, 'r') for filename in in_files]
    with tables.open_file(out_file, 'w') as fout:
        for group in groups:
            _combine_h5_group(fins, fout, group)
    for fin in fins:
        fin.close()
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def open_h5_for_writing(filename):
    filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
    return tables.open_file(filename, 'w', filters = filters)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_pca_from_h5(filename):
    """ Load just the PCA info from an analysis h5 """
    with tables.open_file(filename, 'r') as f:
        group = f.root._v_groups[cr_constants.ANALYSIS_H5_PCA_GROUP]
        # Just take the first PCA object, assuming we never have multiple
        for _, pca in cr_io.load_h5_iter(group, PCA):
            return pca
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_pca_from_h5(filename):
        """ Load just the PCA info from an analysis h5 """
        with tables.open_file(filename, 'r') as f:
            group = f.root._v_groups[cr_constants.ANALYSIS_H5_PCA_GROUP]
            # Just take the first PCA object, assuming we never have multiple
            for _, pca in cr_io.load_h5_iter(group, PCA):
                return pca
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_clustering_from_h5(filename, clustering_key):
        """ Load a single clustering from an analysis h5 """
        with tables.open_file(filename, 'r') as f:
            group = getattr(f.root, cr_constants.ANALYSIS_H5_CLUSTERING_GROUP)
            for subgroup in group:
                if subgroup._v_name == '_' + clustering_key:
                    return cr_io.load_h5_namedtuple(subgroup, cr_clustering.CLUSTERING)
            raise ValueError("Could not find clustering key: %s in HDF5 file %s" % (clustering_key, filename))
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_bcs_from_matrix_h5(filename):
        """ Load just the barcodes from a matrix h5 """
        with tables.open_file(filename, 'r') as f:
            # Take the first group, assuming a single-genome matrix
            group = list(f.list_nodes(f.root))[0]
            return cr_matrix.GeneBCMatrix.load_bcs_from_h5_group(group)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_graphclust_from_h5(filename):
    with tables.open_file(filename, 'r') as f:
        group = f.root._v_groups[cr_constants.ANALYSIS_H5_CLUSTERING_GROUP]

        # Take the first entry
        for key, clustering in cr_io.load_h5_iter(group, cr_clustering.CLUSTERING):
            clustering_type, _ = cr_clustering.parse_clustering_key(key)
            if clustering_type == cr_clustering.CLUSTER_TYPE_GRAPHCLUST:
                return clustering
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def concatenate_h5(input_files, output_file):
    with tables.open_file(output_file, mode = 'w') as fout:
        dsets = {}
        # init datasets using the first input
        if len(input_files) > 0:
            with tables.open_file(input_files[0], mode = 'r') as fin:
                for node in fin.walk_nodes('/', 'Array'):
                    atom = tables.Atom.from_dtype(np.dtype(node.dtype))
                    dsets[node.name] = fout.create_earray('/', node.name, atom, (0,))
        # copy the data
        for input_file in input_files:
            with tables.open_file(input_file, mode = 'r') as fin:
                for (name, earray) in dsets.iteritems():
                    earray.append(fin.get_node('/', name)[:])
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_h5_filetype(filename):
    with tables.open_file(filename, mode = 'r') as f:
        try:
            filetype = f.get_node_attr('/', cr_constants.H5_FILETYPE_KEY)
        except AttributeError:
            filetype = None # older files lack this key
    return filetype
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_array_h5(filename, name):
    """ Load an array from the root of an h5 file """
    with tables.open_file(filename, 'r') as f:
        return getattr(f.root, name).read()
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def save_h5(self, filename, extra_attrs={}):
        self.tocsc()
        filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
        with tables.open_file(filename, 'w', filters = filters) as f:
            f.set_node_attr('/', cr_constants.H5_FILETYPE_KEY, MATRIX_H5_FILETYPE)
            # set optional top-level attributes
            for (k,v) in extra_attrs.iteritems():
                f.set_node_attr('/', k, v)
            for genome, matrix in self.matrices.iteritems():
                group = f.create_group(f.root, genome)
                matrix.save_h5(f, group)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def merge(self, filename):
        self.tocoo()
        with tables.open_file(filename, 'r') as f:
            for group in f.list_nodes(f.root):
                genome = group._v_name
                if genome in self.matrices:
                    self.matrices[genome].merge(group)
                else:
                    self.matrices[genome] = GeneBCMatrix.load(group)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_h5(filename):
        matrices = GeneBCMatrices()
        with tables.open_file(filename, 'r') as f:
            for group in f.list_nodes(f.root):
                genome = group._v_name
                matrices.matrices[genome] = GeneBCMatrix.load(group)
        return matrices
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_dims_from_h5(filename):
        dims = {}
        with tables.open_file(filename, 'r') as f:
            for group in f.list_nodes(f.root):
                genome = group._v_name
                dims[genome] = GeneBCMatrix.load_dims(group)
        return dims
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def load_chemistry_from_h5(filename):
        with tables.open_file(filename, 'r') as f:
            try:
                chemistry = f.get_node_attr('/', cr_constants.H5_CHEMISTRY_DESC_KEY)
            except AttributeError:
                chemistry = "Unknown"
        return chemistry
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_matrix_attrs(matrix_h5):
    attrs = {}
    with tables.open_file(matrix_h5, 'r') as f:
        for key in cr_constants.H5_METADATA_ATTRS:
            try:
                val = f.get_node_attr('/', key)
                attrs[key] = val
            except AttributeError:
                pass
    return attrs