Python tables 模块,openFile() 实例源码

我们从Python开源项目中,提取了以下24个代码示例,用于说明如何使用tables.openFile()

项目:feature_engineering    作者:webeng    | 项目源码 | 文件源码
def get_tfidf_model(self):
        with open(self.data_path + 'tfidf.pkl', 'rb') as pkl_file:
            vectorizer = cPickle.load(pkl_file)

        vectorizer.tokenizer = self.tokenize

        with tables.openFile(self.data_path + 'tfidf_keys.hdf', 'r') as f:
            keys = f.root.keys.read()

        with tables.openFile(self.data_path + 'tfidf_values.hdf', 'r') as f:
            values = f.root.values.read()

        vectorizer.vocabulary_ = dict(zip(keys, values))

        with tables.openFile(self.data_path + 'tfidf_stop_words.hdf', 'r') as f:
            vectorizer.stop_words_ = set(f.root.stop_words.read())

        return vectorizer
项目:PH5    作者:PIC-IRIS    | 项目源码 | 文件源码
def is_mini (ltable) :
    '''
       Check to see if this is an external file, and re-open 'a'
    '''
    from tables import openFile
    from re import compile
    #   Das_t is always in an external file
    Das_tRE = compile ("(/Experiment_g/Receivers_g/Das_g_.*)/Das_t")
    ltablepath = ltable._v_pathname
    if Das_tRE.match (ltablepath) :
        ltablefile = ltable._v_file
        if ltablefile.mode != 'a' :
            filename = ltablefile.filename
            ltablefile.close ()
            mini = openFile (filename, 'a')
            ltable = mini.get_node (ltablepath)
            add_reference (ltablepath, ltable)

    return ltable
项目:DBAdapter    作者:ContinuumIO    | 项目源码 | 文件源码
def write_tables():
    import tables

    dtype = np.dtype("S7,f4,f4,f4,f4,i4")
    t0 = time()
    sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
                          for i in xrange(N)), dtype, count=N)
    t1 = time() - t0
    print "Created sarray with %d rows in %.3fs" % (N, t1)

    t0 = time()
    h5f = tables.openFile("market.h5", "w")
    table = h5f.createTable(h5f.root, "market", dtype)
    table.append(sarray)
    h5f.close()
    t1 = time() - t0
    print "[PyTables] Stored %d rows in %.3fs" % (N, t1)
项目:DBAdapter    作者:ContinuumIO    | 项目源码 | 文件源码
def write_tables2():
    import tables

    dtype = np.dtype("S7,f4,f4,f4,f4,i4")
    # t0 = time()
    # sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
    #                       for i in xrange(N)), dtype, count=N)
    # t1 = time() - t0
    # print "Created sarray with %d rows in %.3fs" % (N, t1)

    t0 = time()
    h5f = tables.openFile("market.h5", "w")
    table = h5f.createTable(h5f.root, "market", dtype)
    count = 10000
    for j in xrange(count, N, count):
        sarray = np.fromiter(((str(i), float(i), float(2*i), None, float(4*i), i)
                              for i in xrange(j)), dtype)
        table.append(sarray)
    h5f.close()
    t1 = time() - t0
    print "[PyTables] Stored %d rows in %.3fs" % (N, t1)
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def merge_all_files_into_pytables(file_dir, file_out):
    """
    process each file into pytables
    """
    start = None
    start = datetime.datetime.now()
    out_h5 = tables.openFile(file_out,
                             mode="w",
                             title="bars",
                             filters=tables.Filters(complevel=9,
                                                    complib='zlib'))
    table = None
    for file_in in glob.glob(file_dir + "/*.gz"):
        gzip_file = gzip.open(file_in)
        expected_header = ["dt", "sid", "open", "high", "low", "close",
                           "volume"]
        csv_reader = csv.DictReader(gzip_file)
        header = csv_reader.fieldnames
        if header != expected_header:
            logging.warn("expected header %s\n" % (expected_header))
            logging.warn("header_found %s" % (header))
            return

        for current_date, rows in parse_csv(csv_reader):
            table = out_h5.createTable("/TD", "date_" + current_date,
                                       OHLCTableDescription,
                                       expectedrows=len(rows),
                                       createparents=True)
            table.append(rows)
            table.flush()
        if table is not None:
            table.flush()
    end = datetime.datetime.now()
    diff = (end - start).seconds
    logging.debug("finished  it took %d." % (diff))
项目:Attentive_reader    作者:caglar    | 项目源码 | 文件源码
def get_length(path):
    if tables.__version__[0] == '2':
        target_table = tables.openFile(path, 'r')
        target_index = target_table.getNode('/indices')
    else:
        target_table = tables.open_file(path, 'r')
        target_index = target_table.get_node('/indices')

    return target_index.shape[0]
项目:Attentive_reader    作者:caglar    | 项目源码 | 文件源码
def synchronized_open_file(*args, **kwargs):
    if tables.__version__[0] == '2':
        tbf = tables.openFile(*args, **kwargs)
    else:
        tbf = tables.open_file(*args, **kwargs)
    return tbf
项目:Attentive_reader    作者:caglar    | 项目源码 | 文件源码
def get_length(path):
    if tables.__version__[0] == '2':
        target_table = tables.openFile(path, 'r')
        target_index = target_table.getNode('/indices')
    else:
        target_table = tables.open_file(path, 'r')
        target_index = target_table.get_node('/indices')

    return target_index.shape[0]
项目:Attentive_reader    作者:caglar    | 项目源码 | 文件源码
def synchronized_open_file(*args, **kwargs):
    if tables.__version__[0] == '2':
        tbf = tables.openFile(*args, **kwargs)
    else:
        tbf = tables.open_file(*args, **kwargs)
    return tbf
项目:picosdk-python-examples    作者:picotech    | 项目源码 | 文件源码
def _f_open(self, args):
        if not self._opened:
            self._filename = args["filename"]
            self._title = args["title"]
            if self._title is None or not isinstance(self._title, basestring):
                self._title = strftime("PicoTape-%Y%m%d-%H%M%S")
            self._limit = args["limit"]
            self._overwrite = args["overwrite"]
            if self._filename is not None:
                self._fhandle = None
                error = "OK"
                try:
                    if not os.path.exists(os.path.dirname(self._filename)):
                        error = "Path to %s not found" % self._filename
                    elif not self._overwrite and os.path.exists(self._filename):
                        error = "File %s exists" % self._filename
                    else:
                        self._fhandle = tb.openFile(self._filename, title=self._title, mode="w")
                except Exception as ex:
                    self._fhandle = None
                    error = ex.message
                if self._fhandle is not None:
                    self._opened = True
                self._readq.put(error)
            else:
                self._memstore = True
                self._opened = True
                self._readq.put("OK")
            self._stats = args["stats"] and not self._memstore
项目:MusicGenreClassification    作者:mlachmish    | 项目源码 | 文件源码
def open_h5_file_read(h5filename):
    """
    Open an existing H5 in read mode.
    Same function as in hdf5_utils, here so we avoid one import
    """
    return tables.openFile(h5filename, mode='r')
项目:neural_mt    作者:chrishokamp    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:MXNMT    作者:magic282    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:NMT-Coverage    作者:tuzhaopeng    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:dl4mt-multi-src    作者:nyu-dl    | 项目源码 | 文件源码
def safe_hdf(array, name):
    if os.path.isfile(name + '.hdf') and not args.overwrite:
        logger.warning("Not saving %s, already exists." % (name + '.hdf'))
    else:
        if os.path.isfile(name + '.hdf'):
            logger.info("Overwriting %s." % (name + '.hdf'))
        else:
            logger.info("Saving to %s." % (name + '.hdf'))
        with tables.openFile(name + '.hdf', 'w') as f:
            atom = tables.Atom.from_dtype(array.dtype)
            filters = tables.Filters(complib='blosc', complevel=5)
            ds = f.createCArray(f.root, name.replace('.', ''), atom,
                                array.shape, filters=filters)
            ds[:] = array
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def open_h5_file_read(h5filename):
    """
    Open an existing H5 in read mode.
    """
    return tables.openFile(h5filename, mode='r')
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def open_h5_file_append(h5filename):
    """
    Open an existing H5 in append mode.
    """
    return tables.openFile(h5filename, mode='a')


################################################ MAIN #####################################
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def open_h5_file_read(h5filename):
    """
    Open an existing H5 in read mode.
    Same function as in hdf5_utils, here so we avoid one import
    """
    return tables.openFile(h5filename, mode='r')
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def process_filelist_train(filelist=None,testsongs=None,tmpfilename=None):
    """
    Main function, process all files in the list (as long as their track_id
    is not in testsongs)
    INPUT
       filelist     - a list of song files
       testsongs    - set of track ID that we should not use
       tmpfilename  - where to save our processed features
    """
    # sanity check
    for arg in locals().values():
        assert not arg is None,'process_filelist_train, missing an argument, something still None'
    if os.path.isfile(tmpfilename):
        print 'ERROR: file',tmpfilename,'already exists.'
        return
    # dimension fixed (12-dimensional timbre vector)
    ndim = 12
    finaldim = 90
    # create outputfile
    output = tables.openFile(tmpfilename, mode='a')
    group = output.createGroup("/",'data','TMP FILE FOR ARTIST RECOGNITION')
    output.createEArray(group,'feats',tables.Float64Atom(shape=()),(0,finaldim),'',
                        expectedrows=len(filelist))
    output.createEArray(group,'artist_id',tables.StringAtom(18,shape=()),(0,),'',
                        expectedrows=len(filelist))
    # iterate over files
    cnt_f = 0
    for f in filelist:
        cnt_f += 1
        # verbose
        if cnt_f % 50000 == 0:
            print 'training... checking file #',cnt_f
        # check what file/song is this
        h5 = GETTERS.open_h5_file_read(f)
        artist_id = GETTERS.get_artist_id(h5)
        track_id = GETTERS.get_track_id(h5)
        if track_id in testsongs: # just in case, but should not be necessary
            print 'Found test track_id during training? weird.',track_id
            h5.close()
            continue
        # extract features, then close file
        processed_feats = compute_features(h5)
        h5.close()
        if processed_feats is None:
            continue
        # save features to tmp file
        output.root.data.artist_id.append( np.array( [artist_id] ) )
        output.root.data.feats.append( processed_feats )
    # we're done, close output
    output.close()
    return
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def train(nthreads,maindir,output,testsongs,trainsongs=None):
    """
    Main function to do the training
    Do the main pass with the number of given threads.
    Then, reads the tmp files, creates the main output, delete the tmpfiles.
    INPUT
      - nthreads     - number of threads to use
      - maindir      - dir of the MSD, wehre to find song files
      - output       - main model, contains everything to perform KNN
      - testsongs    - set of songs to ignore
      - trainsongs   - list of songs to use for training (FASTER)
    RETURN
       - nothing :)
    """
    # sanity checks
    if os.path.isfile(output):
        print 'ERROR: file',output,'already exists.'
        return
    # initial time
    t1 = time.time()
    # do main pass
    tmpfiles = process_filelist_train_main_pass(nthreads,maindir,testsongs,trainsongs=trainsongs)
    if tmpfiles is None:
        print 'Something went wrong, tmpfiles are None'
        return
    # intermediate time
    t2 = time.time()
    stimelen = str(datetime.timedelta(seconds=t2-t1))
    print 'Main pass done after',stimelen; sys.stdout.flush()
    # find approximate number of rows per tmpfiles
    h5 = tables.openFile(tmpfiles[0],'r')
    nrows = h5.root.data.artist_id.shape[0] * len(tmpfiles)
    h5.close()
    # create output
    output = tables.openFile(output, mode='a')
    group = output.createGroup("/",'data','KNN MODEL FILE FOR ARTIST RECOGNITION')
    output.createEArray(group,'feats',tables.Float64Atom(shape=()),(0,90),'feats',
                        expectedrows=nrows)
    output.createEArray(group,'artist_id',tables.StringAtom(18,shape=()),(0,),'artist_id',
                        expectedrows=nrows)
    # aggregate temp files
    for tmpf in tmpfiles:
        h5 = tables.openFile(tmpf)
        output.root.data.artist_id.append( h5.root.data.artist_id[:] )
        output.root.data.feats.append( h5.root.data.feats[:] )
        h5.close()
        # delete tmp file
        os.remove(tmpf)
    # close output
    output.close()
    # final time
    t3 = time.time()
    stimelen = str(datetime.timedelta(seconds=t3-t1))
    print 'Whole training done after',stimelen
    # done
    return
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def process_filelist_test(filelist=None,model=None,tmpfilename=None,K=1):
    """
    Main function, process all files in the list (as long as their track_id
    is not in testsongs)
    INPUT
       filelist     - a list of song files
       model        - h5 file containing feats and artist_id for all train songs
       tmpfilename  - where to save our processed features
       K            - K-nn parameter (default=1)
    """
    # sanity check
    for arg in locals().values():
        assert not arg is None,'process_filelist_train, missing an argument, something still None'
    if os.path.isfile(tmpfilename):
        print 'ERROR: file',tmpfilename,'already exists.'
        return
    if not os.path.isfile(model):
        print 'ERROR: model',model,'does not exist.'
        return
    # dimension fixed (12-dimensional timbre vector)
    ndim = 12
    finaldim = 90
    # create kdtree
    h5model = tables.openFile(model, mode='r')
    assert h5model.root.data.feats.shape[1]==finaldim,'inconsistency in final dim'
    kd = ANN.kdtree(h5model.root.data.feats)
    # create outputfile
    output = tables.openFile(tmpfilename, mode='a')
    group = output.createGroup("/",'data','TMP FILE FOR ARTIST RECOGNITION')
    output.createEArray(group,'artist_id_real',tables.StringAtom(18,shape=()),(0,),'',
                        expectedrows=len(filelist))
    output.createEArray(group,'artist_id_pred',tables.StringAtom(18,shape=()),(0,),'',
                        expectedrows=len(filelist))
    # iterate over files
    cnt_f = 0
    for f in filelist:
        cnt_f += 1
        # verbose
        if cnt_f % 50000 == 0:
            print 'training... checking file #',cnt_f
        # check what file/song is this
        h5 = GETTERS.open_h5_file_read(f)
        artist_id = GETTERS.get_artist_id(h5)
        track_id = GETTERS.get_track_id(h5)
        if track_id in testsongs: # just in case, but should not be necessary
            print 'Found test track_id during training? weird.',track_id
            h5.close()
            continue
        # extract features, then close file
        processed_feats = compute_features(h5)
        h5.close()
        if processed_feats is None:
            continue
        # do prediction
        artist_id_pred = do_prediction(processed_feats,kd,h5model,K)
        # save features to tmp file
        output.root.data.artist_id_real.append( np.array( [artist_id] ) )
        output.root.data.artist_id_pred.append( np.array( [artist_id_pred] ) )
    # we're done, close output
    output.close()
    return
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def test(nthreads,model,testsongs,K):
    """
    Main function to do the training
    Do the main pass with the number of given threads.
    Then, reads the tmp files, creates the main output, delete the tmpfiles.
    INPUT
      - nthreads     - number of threads to use
      - model        - h5 files containing feats and artist_id for all train songs
      - testsongs    - set of songs to ignore
      - K            - K-nn parameter
    RETURN
       - nothing :)
    """
    # initial time
    t1 = time.time()
    # do main pass
    tmpfiles = process_filelist_test_main_pass(nthreads,model,testsongs,K)
    if tmpfiles is None:
        print 'Something went wrong, tmpfiles are None'
        return
    # intermediate time
    t2 = time.time()
    stimelen = str(datetime.timedelta(seconds=t2-t1))
    print 'Main pass done after',stimelen; sys.stdout.flush()
    # aggregate temp files
    artist_id_found = 0
    total_predictions = 0
    for tmpf in tmpfiles:
        h5 = tables.openFile(tmpf)
        for k in range( h5.root.data.artist_id_real.shape[0] ):
            total_predictions += 1
            if h5.root.data.artist_id_real[k] == h5.root.data.artist_id_pred[k]:
                artist_id_found += 1
        h5.close()
        # delete tmp file
        os.remove(tmpf)
    # final time
    t3 = time.time()
    stimelen = str(datetime.timedelta(seconds=t3-t1))
    print 'Whole testing done after',stimelen
    # results
    print 'We found the right artist_id',artist_id_found,'times out of',total_predictions,'predictions.'
    print 'e.g., accuracy is:',artist_id_found*1./total_predictions
    # done
    return
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def test(nthreads,model,testsongs,npicks,winsize,finaldim,K,typecompress):
    """
    Main function to do the testing
    Do the main pass with the number of given threads.
    Then, reads the tmp files, computes the score, delete the tmpfiles.
    INPUT
      - nthreads     - number of threads to use
      - model        - h5 files containing feats and year for all train songs
      - testsongs    - songs to test on
      - npicks       - number of samples to pick per song
      - winsize      - window size (in beats) of a sample
      - finaldim     - final dimension of the sample, something like 5?
      - K            - K-nn parameter
      - typecompress - feature type, one of: 'picks', 'corrcoeff', 'cov'
    RETURN
       - nothing
    """
    # initial time
    t1 = time.time()
    # do main pass
    tmpfiles = process_filelist_test_main_pass(nthreads,model,testsongs,
                                               npicks,winsize,finaldim,K,
                                               typecompress)

    if tmpfiles is None:
        print 'Something went wrong, tmpfiles are None'
        return
    # intermediate time
    t2 = time.time()
    stimelen = str(datetime.timedelta(seconds=t2-t1))
    print 'Main pass done after',stimelen; sys.stdout.flush()
    # aggregate temp files
    year_real = []
    year_pred = []
    for tmpf in tmpfiles:
        h5 = tables.openFile(tmpf)
        year_real.extend( h5.root.data.year_real[:] )
        year_pred.extend( h5.root.data.year_pred[:] )
        h5.close()
        # delete tmp file
        os.remove(tmpf)
    # result
    BENCHMARK.evaluate(year_real,year_pred,verbose=1)
    # final time
    t3 = time.time()
    stimelen = str(datetime.timedelta(seconds=t3-t1))
    print 'Whole testing done after',stimelen
    # done
    return
项目:DAMR    作者:V2AI    | 项目源码 | 文件源码
def create_aggregate_file(h5filename,title='H5 Aggregate File',force=False,expectedrows=1000,complevel=1,
                          summaryfile=False):
    """
    Create a new HDF5 file for all songs.
    It will contains everything that are in regular song files.
    Tables created empty.
    If force=False, refuse to overwrite an existing file
    Raise a ValueError if it's the case.
    If summaryfile=True, creates a sumary file, i.e. no arrays
    Other optional param is the H5 file.
    DETAILS
    - if you create a very large file, try to approximate correctly
      the number of data points (songs), it speeds things up with arrays (by
      setting the chunking correctly).
    - we set the compression level to 1 by default, it uses the ZLIB library
      to disable compression, set it to 0

    Setups the groups, each containing a table 'songs' with one row:
    - metadata
    - analysis
    """
    # check if file exists
    if not force:
        if os.path.exists(h5filename):
            raise ValueError('file exists, can not create HDF5 song file')
    # summary file? change title
    if summaryfile:
        title = 'H5 Summary File'
    # create the H5 file
    h5 = tables.openFile(h5filename, mode='w', title='H5 Song File')
    # set filter level
    h5.filters = tables.Filters(complevel=complevel,complib='zlib')
    # setup the groups and tables
        # group metadata
    group = h5.createGroup("/",'metadata','metadata about the song')
    table = h5.createTable(group,'songs',DESC.SongMetaData,'table of metadata for one song',
                           expectedrows=expectedrows)
        # group analysis
    group = h5.createGroup("/",'analysis','Echo Nest analysis of the song')
    table = h5.createTable(group,'songs',DESC.SongAnalysis,'table of Echo Nest analysis for one song',
                           expectedrows=expectedrows)
        # group musicbrainz
    group = h5.createGroup("/",'musicbrainz','data about the song coming from MusicBrainz')
    table = h5.createTable(group,'songs',DESC.SongMusicBrainz,'table of data coming from MusicBrainz',
                           expectedrows=expectedrows)
    # create arrays
    if not summaryfile:
        create_all_arrays(h5,expectedrows=expectedrows)
    # close it, done
    h5.close()