我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用reader.Reader()。
def read_data(source_path, word_alphabet, pos_alphabet, type_alphabet, max_size=None, normalize_digits=True): logger = utils.get_logger("Reading Data") data = [[] for _ in _buckets] counter = 0 reader = Reader(source_path, word_alphabet, pos_alphabet, type_alphabet) inst = reader.getNext(normalize_digits) while inst is not None and (not max_size or counter < max_size): counter += 1 if counter % 10000 == 0: logger.info("reading data: %d" % counter) inst_size = inst.length() for bucket_id, bucket_size in enumerate(_buckets): if inst_size <= bucket_size: data[bucket_id].append([inst.word_ids, inst.pos_ids, inst.heads, inst.type_ids]) break inst = reader.getNext(normalize_digits) reader.close() logger.info("Total number of data: %d" % counter) return data
def readline(self, n=None): from reader import Reader try: # this isn't quite right: it will clobber any prompt that's # been printed. Not sure how to get around this... return Reader(self.con).readline() except EOFError: return ''
def read_robinson(directory, suffix, read_what): read_what = readwhat.normalize(read_what) read_what_str = readwhat.read_what2string(read_what) sys.stderr.write("Now reading '%s' from directory %s with suffix %s\n" % (read_what_str, directory, suffix)) rd = reader.Reader(directory, suffix) rd.read_NT(read_what) return rd
def model(self): X_reader = Reader(self.X_train_file, name='X', image_size=self.image_size, batch_size=self.batch_size) Y_reader = Reader(self.Y_train_file, name='Y', image_size=self.image_size, batch_size=self.batch_size) x = X_reader.feed() y = Y_reader.feed() cycle_loss = self.cycle_consistency_loss(self.G, self.F, x, y) # X -> Y fake_y = self.G(x) G_gan_loss = self.generator_loss(self.D_Y, fake_y, use_lsgan=self.use_lsgan) G_loss = G_gan_loss + cycle_loss D_Y_loss = self.discriminator_loss(self.D_Y, y, self.fake_y, use_lsgan=self.use_lsgan) # Y -> X fake_x = self.F(y) F_gan_loss = self.generator_loss(self.D_X, fake_x, use_lsgan=self.use_lsgan) F_loss = F_gan_loss + cycle_loss D_X_loss = self.discriminator_loss(self.D_X, x, self.fake_x, use_lsgan=self.use_lsgan) # summary tf.summary.histogram('D_Y/true', self.D_Y(y)) tf.summary.histogram('D_Y/fake', self.D_Y(self.G(x))) tf.summary.histogram('D_X/true', self.D_X(x)) tf.summary.histogram('D_X/fake', self.D_X(self.F(y))) tf.summary.scalar('loss/G', G_gan_loss) tf.summary.scalar('loss/D_Y', D_Y_loss) tf.summary.scalar('loss/F', F_gan_loss) tf.summary.scalar('loss/D_X', D_X_loss) tf.summary.scalar('loss/cycle', cycle_loss) tf.summary.image('X/generated', utils.batch_convert2int(self.G(x))) tf.summary.image('X/reconstruction', utils.batch_convert2int(self.F(self.G(x)))) tf.summary.image('Y/generated', utils.batch_convert2int(self.F(y))) tf.summary.image('Y/reconstruction', utils.batch_convert2int(self.G(self.F(y)))) return G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x
def eva_a_phi(phi): na, nnh, nh, nw = phi # choose a dataset to train (mscoco, flickr8k, flickr30k) dataset = 'mscoco' data_dir = osp.join(DATA_ROOT, dataset) from model.ra import Model # settings mb = 64 # mini-batch size lr = 0.0002 # learning rate # nh = 512 # size of LSTM's hidden size # nnh = 512 # hidden size of attention mlp # nw = 512 # size of word embedding vector # na = 512 # size of the region features after dimensionality reduction name = 'ra' # model name, just setting it to 'ra' is ok. 'ra'='region attention' vocab_freq = 'freq5' # use the vocabulary that filtered out words whose frequences are less than 5 print '... loading data {}'.format(dataset) train_set = Reader(batch_size=mb, data_split='train', vocab_freq=vocab_freq, stage='train', data_dir=data_dir, feature_file='features_30res.h5', topic_switch='off') # change 0, 1000, 82783 valid_set = Reader(batch_size=1, data_split='val', vocab_freq=vocab_freq, stage='val', data_dir=data_dir, feature_file='features_30res.h5', caption_switch='off', topic_switch='off') # change 0, 10, 5000 npatch, nimg = train_set.features.shape[1:] nout = len(train_set.vocab) save_dir = '{}-nnh{}-nh{}-nw{}-na{}-mb{}-V{}'.\ format(dataset.lower(), nnh, nh, nw, na, mb, nout) save_dir = osp.join(SAVE_ROOT, save_dir) model_file, m = find_last_snapshot(save_dir, resume_training=False) os.system('cp model/ra.py {}/'.format(save_dir)) logger = Logger(save_dir) logger.info('... building') model = Model(name=name, nimg=nimg, nnh=nnh, nh=nh, na=na, nw=nw, nout=nout, npatch=npatch, model_file=model_file) # start training bs = BeamSearch([model], beam_size=1, num_cadidates=100, max_length=20) best = train(model, bs, train_set, valid_set, save_dir, lr, display=100, starting=m, endding=20, validation=2000, life=10, logger=logger) # change dis1,100; va 2,2000; life 0,10; average_models(best=best, L=6, model_dir=save_dir, model_name=name+'.h5') # L 1, 6 # evaluation np.save('data_dir', data_dir) np.save('save_dir', save_dir) os.system('python valid_time.py') scores = np.load('scores.npy') running_time = np.load('running_time.npy') print 'cider:', scores[-1], 'B1-4,C:', scores, 'running time:', running_time return scores, running_time
def main(): if len(sys.argv) == 3: config = Config(sys.argv[1], sys.argv[2]) else: assert False phase = config.items['phase'] from reader import Reader train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=True) valid_set = Reader(phase='val', batch_size=10, do_shuffle=False) test_set = Reader(phase='test', batch_size=10, do_shuffle=False) glog.info('generating model...') from model import Model # with tf.device('/cpu:0'): # with tf.device('/gpu:%d'%config.items['gpu']): model = Model(config.items['lr']) # try: # config.items['starting'] = int(config.items['model'].split('_')[-1]) # except: config.items['starting'] = 0 # snapshot path mkdir_safe(config.items['snap_path']) sess_config = tf.ConfigProto(allow_soft_placement=True, device_count = {'GPU': 4}) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: tf.global_variables_initializer().run() if 'model' in config.items.keys(): model.saver.restore(sess, config.items['model']) glog.info('loading model: %s...' % config.items['model']) if phase == 'ctc': glog.info('ctc training...') train_valid(sess, model, train_set, valid_set, test_set, config) # elif phase == 'extract_feature': # pass # elif phase == 'get_prediction': # from reader import Reader # train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False) # glog.info('feature extracting...') # get_prediction(model, train_set, config) # elif phase == 'top_k_prediction': # from reader import Reader # train_set = Reader(phase='test', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False) # glog.info('feature extracting...') # get_top_k_prediction(model, train_set, config) glog.info('end')
def main(): if len(sys.argv) == 3: config = Config(sys.argv[1], sys.argv[2]) else: assert False from utils import mkdir_safe, log_self log_self(__file__) glog.info('generating model...') from model_after import Model model = Model(learning_rate=config.items['lr'], config=config) # load model if 'model' in config.items.keys(): glog.info('loading model: %s...' % config.items['model']) model.load_model(config.items['model']) elif 'model_old' in config.items.keys(): glog.info('loading model from old: %s...' % config.items['model_old']) model.load_old_model(config.items['model_old']) from reader import Reader train_set = Reader(phase='train', config=config, do_shuffle=True, resample=True) valid_set = Reader(phase='dev', config=config, do_shuffle=True, resample=False, feature_mean=train_set.feature_mean, feature_std=train_set.feature_std) test_set = Reader(phase='test', config=config, do_shuffle=True, resample=False, feature_mean=train_set.feature_mean, feature_std=train_set.feature_std) try: config.items['starting'] = int(config.items['model'].split('_')[-1]) except: config.items['starting'] = 0 if 'predict' in config.items.keys(): prob_predict(model, train_set, config, epoch=config.items['starting']) prob_predict(model, valid_set, config, epoch=config.items['starting']) prob_predict(model, test_set, config, epoch=config.items['starting']) return # snapshot path mkdir_safe(config.items['snap_path']) mkdir_safe(os.path.join(config.items['snap_path'], 'output_dev')) mkdir_safe(os.path.join(config.items['snap_path'], 'output_test')) glog.info('training...') train_valid(model, train_set, valid_set, test_set, config) glog.info('end')
def main(): if len(sys.argv) == 3: config = Config(sys.argv[1], sys.argv[2]) else: assert False phase = config.items['phase'] glog.info('generating model...') from model_me import Model with tf.device('/cpu:0'): # with tf.device('/gpu:%d'%config.items['gpu']): model = Model() # try: # config.items['starting'] = int(config.items['model'].split('_')[-1]) # except: config.items['starting'] = 0 # snapshot path mkdir_safe(config.items['snap_path']) mkdir_safe(os.path.join(config.items['snap_path'], 'output_dev')) mkdir_safe(os.path.join(config.items['snap_path'], 'output_test')) sess_config = tf.ConfigProto(device_count = {'GPU': 0}) # sess_config = tf.ConfigProto(allow_soft_placement=True) # sess_config.gpu_options.allow_growth = True from reader import Reader train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=True, resample=True, distortion=True) valid_set = None#Reader(phase='dev', batch_size=1, do_shuffle=False, resample=False, distortion=False) test_set = None#Reader(phase='test', batch_size=1, do_shuffle=False, resample=False, distortion=False) with tf.Session(config=sess_config) as sess: tf.global_variables_initializer().run() if 'model' in config.items.keys(): model.assign_from_pkl(config.items['model']) pdb.set_trace() glog.info('loading model: %s...' % config.items['model']) if phase == 'ctc': # model.make_functions() glog.info('ctc training...') train_valid(sess, model, train_set, valid_set, test_set, config) elif phase == 'extract_feature': pass elif phase == 'get_prediction': from reader import Reader train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False) glog.info('feature extracting...') get_prediction(model, train_set, config) elif phase == 'top_k_prediction': from reader import Reader train_set = Reader(phase='test', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False) glog.info('feature extracting...') get_top_k_prediction(model, train_set, config) glog.info('end')