Python tables 模块,Filters() 实例源码

我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用tables.Filters()

项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def main(args, outs):
    if args.skip or args.is_multi_genome:
        return

    tsne_dims = args.tsne_dims

    matrix = cr_matrix.GeneBCMatrix.load_h5(args.matrix_h5)
    pca = cr_pca.load_pca_from_h5(args.pca_h5)
    tsne = cr_tsne.run_tsne(pca.transformed_pca_matrix, input_pcs=args.input_pcs, perplexity=args.perplexity,
                     theta=args.theta, tsne_dims=tsne_dims, max_iter=args.max_iter, stop_lying_iter=args.stop_lying_iter,
                     mom_switch_iter=args.mom_switch_iter, random_state=args.random_seed)
    tsne_map = {tsne_dims: tsne}

    filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
    with tables.open_file(outs.tsne_h5, 'w', filters = filters) as f:
        cr_tsne.save_tsne_h5(tsne_map, f)

    cr_tsne.save_tsne_csv(tsne_map, matrix, outs.tsne_csv)
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def sparse_save(matrix, filename, dtype=np.dtype(np.float64)):
    print "SAVE SPARSE"
    print matrix.shape

    atom = tb.Atom.from_dtype(dtype)

    f = tb.open_file(filename, 'w')

    print "saving data"
    filters = tb.Filters(complevel=5, complib='blosc')
    out = f.create_carray(f.root, 'data', atom, shape=matrix.data.shape, filters=filters)
    out[:] = matrix.data

    print "saving indices"
    out = f.create_carray(f.root, 'indices', tb.Int64Atom(), shape=matrix.indices.shape, filters=filters)
    out[:] = matrix.indices

    print "saving indptr"
    out = f.create_carray(f.root, 'indptr', tb.Int64Atom(), shape=matrix.indptr.shape, filters=filters)
    out[:] = matrix.indptr

    print "saving done"

    f.close()
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def save_file_origen(file, *, ORIGEN_data, lib, nucs, start_nuclide, time,
    phi, ORIGEN_time, n_fission_fragments=2.004):

    with tables.open_file(file, mode="a", title="ORIGEN and CRAM data",
        filters=tables.Filters(complevel=1)) as h5file:

        if lib not in h5file.root:
            create_hdf5_table(file, lib, nucs)

        table = h5file.get_node(h5file.root, lib + '/origen')
        table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs)
        table.row['library'] = lib
        table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments)
        table.row['time'] = time
        table.row['phi'] = phi
        table.row['n_fission_fragments'] = n_fission_fragments
        table.row['execution time ORIGEN'] = ORIGEN_time
        table.row['ORIGEN atom fraction'] = origen_data_to_array_weighted(ORIGEN_data, nucs, n_fission_fragments=n_fission_fragments)
        table.row['ORIGEN mass fraction'] = origen_data_to_array_materials(ORIGEN_data, nucs)
        table.row.append()
        table.flush()
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def save_file_cram_lambdify(file, *, CRAM_lambdify_res, lib, nucs, start_nuclide, time,
    phi, CRAM_lambdify_time, umfpack, n_fission_fragments=2.004):
    assert len(CRAM_lambdify_res) == len(nucs)
    with tables.open_file(file, mode="a", title="ORIGEN and CRAM data",
        filters=tables.Filters(complevel=1)) as h5file:

        if lib not in h5file.root:
            create_hdf5_table(file, lib, nucs)

        nodename = '/cram-lambdify-umfpack' if umfpack else '/cram-lambdify-superlu'
        table = h5file.get_node(h5file.root, lib + nodename)
        table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs)
        table.row['library'] = lib
        table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments)
        table.row['time'] = time
        table.row['phi'] = phi
        table.row['n_fission_fragments'] = n_fission_fragments
        table.row['execution time CRAM lambdify'] = CRAM_lambdify_time
        table.row['CRAM lambdify atom fraction'] = CRAM_lambdify_res
        CRAM_lambdify_res_normalized = CRAM_lambdify_res/np.sum(CRAM_lambdify_res)
        table.row['CRAM lambdify mass fraction'] = CRAM_lambdify_res_normalized
        table.row.append()
        table.flush()
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def save_file_cram_py_solve(file, *, CRAM_py_solve_res, lib, nucs, start_nuclide, time,
    phi, CRAM_py_solve_time, n_fission_fragments=2.004):
    assert len(CRAM_py_solve_res) == len(nucs)
    with tables.open_file(file, mode="a", title="ORIGEN and CRAM data",
        filters=tables.Filters(complevel=1)) as h5file:

        if lib not in h5file.root:
            create_hdf5_table(file, lib, nucs)

        table = h5file.get_node(h5file.root, lib + '/cram-py_solve')
        table.row['initial vector'] = vec = initial_vector(start_nuclide, nucs)
        table.row['library'] = lib
        table.row['hash'] = hash_data(vec, lib, time, phi, n_fission_fragments)
        table.row['time'] = time
        table.row['phi'] = phi
        table.row['n_fission_fragments'] = n_fission_fragments
        table.row['execution time CRAM py_solve'] = CRAM_py_solve_time
        table.row['CRAM py_solve atom fraction'] = CRAM_py_solve_res
        CRAM_py_solve_res_normalized = CRAM_py_solve_res/np.sum(CRAM_py_solve_res)
        table.row['CRAM py_solve mass fraction'] = CRAM_py_solve_res_normalized
        table.row.append()
        table.flush()
项目:batchup    作者:Britefury    | 项目源码 | 文件源码
def fetch_svhn_extra(source_paths, target_path):
    extra_path = source_paths[0]

    print('Converting {} to HDF5 (compressed)...'.format(extra_path))
    f_out = tables.open_file(target_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data')
    filters = tables.Filters(complevel=9, complib='blosc')
    X_u8_arr = f_out.create_earray(
        g_out, 'extra_X_u8', tables.UInt8Atom(), (0, 3, 32, 32),
        filters=filters)
    y_arr = f_out.create_earray(
        g_out, 'extra_y', tables.Int32Atom(), (0,), filters=filters)

    # Load in the extra data Matlab file
    _insert_svhn_matlab_to_h5(X_u8_arr, y_arr, extra_path)

    f_out.close()

    return target_path
项目:multitables    作者:ghcollin    | 项目源码 | 文件源码
def __enter__(self):
        import tables
        if self.filename is None:
            self.filedir = tempfile.mkdtemp()
            self.filename = os.path.join(self.filedir, 'bench.h5')
        else:
            self.filedir = None

        h5_file = tables.open_file(self.filename, 'w')
        array_kw_args = {}
        if self.complevel > 0:
            array_kw_args['filters'] = tables.Filters(complib=self.complib, complevel=self.complevel)

        array_path = '/bench'
        #ary = h5_file.create_array(h5_file.root, array_path[1:],
        #                           np.arange(np.prod(file_shape), dtype=file_type).reshape(file_shape))
        ary = h5_file.create_earray(h5_file.root, array_path[1:], atom=tables.Atom.from_dtype(file_type),
                                    shape=file_shape, expectedrows=self.n_rows, **array_kw_args)
        for _ in range(0, self.n_rows, 2**10):
            ary.append(2**8*np.random.randn(2**10, *file_shape[1:]))
        print(ary.shape)

        h5_file.close()

        return self.filename, array_path
项目:PH5    作者:PIC-IRIS    | 项目源码 | 文件源码
def create_empty_earray (filenode, groupnode, name, batom = None, expectedrows = None) :
    try :
        bfilter = tables.Filters (complevel=ZLIBCOMP, complib='zlib')
        if expectedrows == None :
            a = filenode.create_earray (groupnode, 
                                        name, 
                                        atom=batom, 
                                        shape=(0,), 
                                        filters=bfilter)
        else :
            a = filenode.create_earray (groupnode, 
                                        name, 
                                        atom=batom, 
                                        shape=(0,), 
                                        filters=bfilter, 
                                        expectedrows=expectedrows)            

    except Exception as e :
        raise HDF5InteractionError (5, e.message)

    return a
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def merge_all_files_into_pytables(file_dir, file_out):
    """
    process each file into pytables
    """
    start = None
    start = datetime.datetime.now()
    out_h5 = tables.openFile(file_out,
                             mode="w",
                             title="bars",
                             filters=tables.Filters(complevel=9,
                                                    complib='zlib'))
    table = None
    for file_in in glob.glob(file_dir + "/*.gz"):
        gzip_file = gzip.open(file_in)
        expected_header = ["dt", "sid", "open", "high", "low", "close",
                           "volume"]
        csv_reader = csv.DictReader(gzip_file)
        header = csv_reader.fieldnames
        if header != expected_header:
            logging.warn("expected header %s\n" % (expected_header))
            logging.warn("header_found %s" % (header))
            return

        for current_date, rows in parse_csv(csv_reader):
            table = out_h5.createTable("/TD", "date_" + current_date,
                                       OHLCTableDescription,
                                       expectedrows=len(rows),
                                       createparents=True)
            table.append(rows)
            table.flush()
        if table is not None:
            table.flush()
    end = datetime.datetime.now()
    diff = (end - start).seconds
    logging.debug("finished  it took %d." % (diff))
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def open_h5_for_writing(filename):
    filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
    return tables.open_file(filename, 'w', filters = filters)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def save_h5(self, filename, extra_attrs={}):
        self.tocsc()
        filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
        with tables.open_file(filename, 'w', filters = filters) as f:
            f.set_node_attr('/', cr_constants.H5_FILETYPE_KEY, MATRIX_H5_FILETYPE)
            # set optional top-level attributes
            for (k,v) in extra_attrs.iteritems():
                f.set_node_attr('/', k, v)
            for genome, matrix in self.matrices.iteritems():
                group = f.create_group(f.root, genome)
                matrix.save_h5(f, group)
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def main(args, outs):
    if args.skip or args.is_multi_genome:
        return

    matrix = cr_matrix.GeneBCMatrix.load_h5(args.matrix_h5)
    pca = cr_pca.run_pca(matrix, pca_genes=args.num_genes, pca_bcs=args.num_bcs,
                 n_pca_components=args.num_pcs, random_state=args.random_seed)
    pca_key = args.num_pcs if args.num_pcs is not None else cr_constants.PCA_N_COMPONENTS_DEFAULT
    pca_map = {pca_key: pca}

    filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
    with tables.open_file(outs.pca_h5, 'w', filters = filters) as f:
        cr_pca.save_pca_h5(pca_map, f)

    cr_pca.save_pca_csv(pca_map, matrix, outs.pca_csv)
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def append(self, key, item): 
        if key not in self.data_: 
            filters = tb.Filters(complevel=5, complib='blosc')
            if isinstance(item, np.ndarray): 
                atom = tb.Atom.from_type(item.dtype.name, item.shape[1:])
            else:
                atom = tb.VLStringAtom()
            self.data_[key] = self.h5f_.create_vlarray(self.h5f_.root, key, 
                                                       atom, filters=filters)
            print('Creating VLArray, and appending to key {}'.format(key))
            print self.data_[key]

        self.data_[key].append(self.pack(item))
项目:transmutagen    作者:ergs    | 项目源码 | 文件源码
def create_hdf5_table(file, lib, nucs):
    nucs_size = len(nucs)
    desc_common = [
        ('hash', np.int64),
        ('library', 'S8'),
        ('initial vector', np.float64, (nucs_size, 1)),
        ('time', np.float64),
        ('phi', np.float64),
        ('n_fission_fragments', np.float64),
    ]
    desc_origen = [
        ('execution time ORIGEN', np.float64),
        ('ORIGEN atom fraction', np.float64, (nucs_size, 1)),
        ('ORIGEN mass fraction', np.float64, (nucs_size, 1)),
        ]
    desc_cram_lambdify = [
        ('execution time CRAM lambdify', np.float64),
        ('CRAM lambdify atom fraction', np.float64, (nucs_size, 1)),
        ('CRAM lambdify mass fraction', np.float64, (nucs_size, 1)),
        ]
    desc_cram_py_solve = [
        ('execution time CRAM py_solve', np.float64),
        ('CRAM py_solve atom fraction', np.float64, (nucs_size, 1)),
        ('CRAM py_solve mass fraction', np.float64, (nucs_size, 1)),
        ]

    h5file = tables.open_file(file, mode="a", title="CRAM/ORIGEN test run data", filters=tables.Filters(complevel=1))
    h5file.create_group('/', lib, '%s data' % lib)
    h5file.create_table('/' + lib, 'origen', np.dtype(desc_common + desc_origen))
    h5file.create_table('/' + lib, 'cram-lambdify-umfpack', np.dtype(desc_common + desc_cram_lambdify))
    h5file.create_table('/' + lib, 'cram-lambdify-superlu', np.dtype(desc_common + desc_cram_lambdify))
    h5file.create_table('/' + lib, 'cram-py_solve', np.dtype(desc_common + desc_cram_py_solve))
    h5file.create_array('/' + lib, 'nucs', np.array(nucs, 'S6'))
项目:groundfailure    作者:usgs    | 项目源码 | 文件源码
def __init__(self, grid2dfile, filename, name=None):
        """
        Convert grid2d file into a temporary hdf5 file for reducing memory
        load.

        Args:
            grid2dfile: grid2d file object to save
            filename (str): Path to where file should be saved (recommended
                it be a temporary dir).
            name (str): Name of layer, if None, will use filename minus the
                extension, or if a multihazard grid2d object, each layer will
                have its own name.
        """
        filename1, file_ext = os.path.splitext(filename)
        if file_ext != '.hdf5':
            filename = filename1 + '.hdf5'
            print('Changed extension from %s to .hdf5' % (file_ext,))
        filters = tables.Filters(complevel=5, complib='blosc')
        with tables.open_file(filename, mode='w') as self.tempfile:
            self.gdict = grid2dfile.getGeoDict()
            if type(grid2dfile) == ShakeGrid:
                for layer in grid2dfile.getLayerNames():
                    filldat = grid2dfile.getLayer(layer).getData()
                    self.tempfile.create_carray(self.tempfile.root, name=layer,
                                                obj=filldat, filters=filters)
                self.shakedict = grid2dfile.getShakeDict()
                self.edict = grid2dfile.getEventDict()
            else:
                if name is None:
                    name = os.path.basename(filename1)
                filldat = grid2dfile.getData()
                self.tempfile.create_carray(self.tempfile.root, name=name,
                                            obj=filldat, filters=filters)
            self.filename = os.path.abspath(filename)
项目:msprime-abc    作者:DomNelson    | 项目源码 | 文件源码
def write_haplotypes(self, h5file):
        """
        Returns an array of genotypes and corresponding chromosome IDs
        """
        filters = tables.Filters(complevel=5, complib='blosc')

        with tables.open_file(h5file, 'w') as h5:
            with open(self.output, 'r') as f:
                line = next(f)
                ind_ID, haplotypes = parse_simuPOP_genotype(line)

        ## Create an extendable array in the h5 output file with
        ## the same shape as the haplotypes
                h5.create_earray(h5.root, 'haps',
                         atom=tables.IntAtom(shape=(2, haplotypes.shape[2])),
                         shape=(0,), filters=filters)
                h5.create_earray(h5.root, 'inds', atom=tables.IntAtom(),
                         shape=(0,), filters=filters)

                h5.root.haps.append(haplotypes)
                h5.root.inds.append(ind_ID)

                for line in f:
                    ind_ID, haplotypes = parse_simuPOP_genotype(line)
                    h5.root.haps.append(haplotypes)
                    h5.root.inds.append(ind_ID)
项目:neural_mt    作者:chrishokamp    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:MXNMT    作者:magic282    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:NMT-Coverage    作者:tuzhaopeng    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:PH5    作者:PIC-IRIS    | 项目源码 | 文件源码
def newdataearray (self, name, data, batom = None, rows = None) :
        #   Use zlib, standard for HDF5
        bfilter = tables.Filters (complevel=ZLIBCOMP, complib='zlib', shuffle=True)
        #
        a = create_data_earray (self.ph5, 
                                self.current_g_das, 
                                name, 
                                data, 
                                batom, 
                                rows=rows)

        return a
项目:dl4mt-multi-src    作者:nyu-dl    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def open(filename, mode, start=None, length=None):
        assert mode == 'r' or mode == 'w'

        mc = MoleculeCounter()

        if mode == 'w':
            assert start is None
            assert length is None
            filters = tables.Filters(complevel = cr_constants.H5_COMPRESSION_LEVEL)
            mc.h5 = tables.open_file(filename, mode = 'w', title = '10X', filters = filters)
            mc.h5.set_node_attr('/', FILE_VERSION_KEY, CURR_FILE_VERSION)
            mc.h5.set_node_attr('/', cr_constants.H5_FILETYPE_KEY, MOLECULE_H5_FILETYPE)
            mc.h5.create_group('/', METRICS_GROUP_NAME)

            for name, col_type in MOLECULE_INFO_COLUMNS.iteritems():
                atom = tables.Atom.from_dtype(np.dtype(col_type))
                # Create an (array, element_buffer) tuple
                # where element_buffer is a len=1 numpy array
                # designed to avoid excess allocations
                mc.columns[name] = (mc.h5.create_earray(mc.h5.root, name, atom, (0,)),
                                      np.array([0], dtype=np.dtype(col_type)))

        elif mode == 'r':
            mc.h5 = tables.open_file(filename, mode = 'r')
            try:
                mc.file_version = mc.h5.get_node_attr('/', FILE_VERSION_KEY)
            except AttributeError:
                mc.file_version = 1 # V1 doesn't have version field

            for node in mc.h5.walk_nodes('/', 'Array'):
                if node.name in MOLECULE_INFO_COLUMNS:
                    if start is None:
                        assert length is None
                        mc.columns[node.name] = (node, None)
                    else:
                        assert length is not None
                        mc.columns[node.name] = (node[start:(start+length)], None)
                elif node.name in MOLECULE_REF_COLUMNS:
                    mc.ref_columns[node.name] = node
                else:
                    raise AttributeError("Illegal column: %s" % node.name)

        return mc
项目:oasis    作者:ngmarchant    | 项目源码 | 文件源码
def repeat_expt(smplr, n_expts, n_labels, output_file = None):
    """
    Parameters
    ----------
    smplr : sub-class of PassiveSampler
        sampler must have a sample_distinct method, reset method and ...

    n_expts : int
        number of expts to run

    n_labels : int
        number of labels to query from the oracle in each expt
    """

    FILTERS = tables.Filters(complib='zlib', complevel=5)

    max_iter = smplr._max_iter
    n_class = smplr._n_class
    if max_iter < n_labels:
        raise ValueError("Cannot query {} labels. Sampler ".format(n_labels) +
                         "instance supports only {} iterations".format(max_iter))

    if output_file is None:
        # Use current date/time as filename
        output_file = 'expt_' + time.strftime("%d-%m-%Y_%H:%M:%S") + '.h5'
    logging.info("Writing output to {}".format(output_file))

    f = tables.open_file(output_file, mode='w', filters=FILTERS)
    float_atom = tables.Float64Atom()
    bool_atom = tables.BoolAtom()
    int_atom = tables.Int64Atom()

    array_F = f.create_carray(f.root, 'F_measure', float_atom, (n_expts, n_labels, n_class))
    array_s = f.create_carray(f.root, 'n_iterations', int_atom, (n_expts, 1))
    array_t = f.create_carray(f.root, 'CPU_time', float_atom, (n_expts, 1))

    logging.info("Starting {} experiments".format(n_expts))
    for i in range(n_expts):
        if i%np.ceil(n_expts/10).astype(int) == 0:
            logging.info("Completed {} of {} experiments".format(i, n_expts))
        ti = time.process_time()
        smplr.reset()
        smplr.sample_distinct(n_labels)
        tf = time.process_time()
        if hasattr(smplr, 'queried_oracle_'):
            array_F[i,:,:] = smplr.estimate_[smplr.queried_oracle_]
        else:
            array_F[i,:,:] = smplr.estimate_
        array_s[i] = smplr.t_
        array_t[i] = tf - ti
    f.close()

    logging.info("Completed all experiments")
项目:ProjectOfDataMining    作者:IljaNovo    | 项目源码 | 文件源码
def check_HDF5_arrays(hdf5_file, N, convergence_iter):
    """Check that the HDF5 data structure of file handle 'hdf5_file' 
        has all the required nodes organizing the various two-dimensional 
        arrays required for Affinity Propagation clustering 
        ('Responsibility' matrix, 'Availability', etc.).

    Parameters
    ----------
    hdf5_file : string or file handle
        Name of the Hierarchical Data Format under consideration.

    N : int
        The number of samples in the data-set that will undergo Affinity Propagation
        clustering.

    convergence_iter : int
        Number of iterations with no change in the number of estimated clusters 
        that stops the convergence.
    """

    Worker.hdf5_lock.acquire()

    with tables.open_file(hdf5_file, 'r+') as fileh:
        if not hasattr(fileh.root, 'aff_prop_group'):
            fileh.create_group(fileh.root, "aff_prop_group")

        atom = tables.Float32Atom()
        filters = None
        #filters = tables.Filters(5, 'blosc')

        for feature in ('availabilities', 'responsibilities',
                            'similarities', 'temporaries'):
            if not hasattr(fileh.root.aff_prop_group, feature):
                fileh.create_carray(fileh.root.aff_prop_group, feature, 
                         atom, (N, N), "Matrix of {0} for affinity "
                         "propagation clustering".format(feature), 
                         filters = filters)

        if not hasattr(fileh.root.aff_prop_group, 'parallel_updates'):
            fileh.create_carray(fileh.root.aff_prop_group,
                     'parallel_updates', atom, (N, convergence_iter), 
                     "Matrix of parallel updates for affinity propagation "
                     "clustering", filters = filters)

    Worker.hdf5_lock.release()
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def create_song_file(h5filename,title='H5 Song File',force=False,complevel=1):
    """
    Create a new HDF5 file for a new song.
    If force=False, refuse to overwrite an existing file
    Raise a ValueError if it's the case.
    Other optional param is the H5 file.
    Setups the groups, each containing a table 'songs' with one row:
    - metadata
    - analysis
    DETAIL
    - we set the compression level to 1 by default, it uses the ZLIB library
      to disable compression, set it to 0
    """
    # check if file exists
    if not force:
        if os.path.exists(h5filename):
            raise ValueError('file exists, can not create HDF5 song file')
    # create the H5 file
    h5 = tables.openFile(h5filename, mode='w', title='H5 Song File')
    # set filter level
    h5.filters = tables.Filters(complevel=complevel,complib='zlib')
    # setup the groups and tables
        # group metadata
    group = h5.createGroup("/",'metadata','metadata about the song')
    table = h5.createTable(group,'songs',DESC.SongMetaData,'table of metadata for one song')
    r = table.row
    r.append() # filled with default values 0 or '' (depending on type)
    table.flush()
        # group analysis
    group = h5.createGroup("/",'analysis','Echo Nest analysis of the song')
    table = h5.createTable(group,'songs',DESC.SongAnalysis,'table of Echo Nest analysis for one song')
    r = table.row
    r.append() # filled with default values 0 or '' (depending on type)
    table.flush()
        # group musicbrainz
    group = h5.createGroup("/",'musicbrainz','data about the song coming from MusicBrainz')
    table = h5.createTable(group,'songs',DESC.SongMusicBrainz,'table of data coming from MusicBrainz')
    r = table.row
    r.append() # filled with default values 0 or '' (depending on type)
    table.flush()
    # create arrays
    create_all_arrays(h5,expectedrows=3)
    # close it, done
    h5.close()
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def create_aggregate_file(h5filename,title='H5 Aggregate File',force=False,expectedrows=1000,complevel=1,
                          summaryfile=False):
    """
    Create a new HDF5 file for all songs.
    It will contains everything that are in regular song files.
    Tables created empty.
    If force=False, refuse to overwrite an existing file
    Raise a ValueError if it's the case.
    If summaryfile=True, creates a sumary file, i.e. no arrays
    Other optional param is the H5 file.
    DETAILS
    - if you create a very large file, try to approximate correctly
      the number of data points (songs), it speeds things up with arrays (by
      setting the chunking correctly).
    - we set the compression level to 1 by default, it uses the ZLIB library
      to disable compression, set it to 0

    Setups the groups, each containing a table 'songs' with one row:
    - metadata
    - analysis
    """
    # check if file exists
    if not force:
        if os.path.exists(h5filename):
            raise ValueError('file exists, can not create HDF5 song file')
    # summary file? change title
    if summaryfile:
        title = 'H5 Summary File'
    # create the H5 file
    h5 = tables.openFile(h5filename, mode='w', title='H5 Song File')
    # set filter level
    h5.filters = tables.Filters(complevel=complevel,complib='zlib')
    # setup the groups and tables
        # group metadata
    group = h5.createGroup("/",'metadata','metadata about the song')
    table = h5.createTable(group,'songs',DESC.SongMetaData,'table of metadata for one song',
                           expectedrows=expectedrows)
        # group analysis
    group = h5.createGroup("/",'analysis','Echo Nest analysis of the song')
    table = h5.createTable(group,'songs',DESC.SongAnalysis,'table of Echo Nest analysis for one song',
                           expectedrows=expectedrows)
        # group musicbrainz
    group = h5.createGroup("/",'musicbrainz','data about the song coming from MusicBrainz')
    table = h5.createTable(group,'songs',DESC.SongMusicBrainz,'table of data coming from MusicBrainz',
                           expectedrows=expectedrows)
    # create arrays
    if not summaryfile:
        create_all_arrays(h5,expectedrows=expectedrows)
    # close it, done
    h5.close()