我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.training()。
def __call__(self, trainer): duration_epoch = time.time() - self.time_epoch epoch = trainer.updater.epoch loss = trainer.observation["main/loss"].data logger.info("epoch: %s, duration: %ds, loss: %.6g.", epoch, duration_epoch, loss) # get rnn state model = trainer.updater.get_optimizer("main").target state = model.predictor.get_state() # generate text seed = generate_seed(self.text) generate_text(model, seed) # set rnn back to training state model.predictor.set_state(state) # reset time self.time_epoch = time.time()
def main(): model = L.Classifier(CNN()) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.SerialIterator(train, batch_size=100) test_iter = chainer.iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (5, 'epoch'), out='result') trainer.extend(extensions.Evaluator(test_iter, model)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy','validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def gan_sampling(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128): @chainer.training.make_extension() def samples_generation(trainer): if not os.path.exists(eval_folder): os.makedirs(eval_folder) z = np.random.normal(size=(rows*cols, latent_len)).astype("f") if gpu>=0: z = cuda.to_gpu(z) z = Variable(z, volatile=True) imgs = gen(z, test=True) save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg", grid_w=rows, grid_h=cols) return samples_generation
def gan_sampling_tags(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128, attr_len=38, threshold=0.25): @chainer.training.make_extension() def get_fake_tag(): prob2 = np.random.rand(attr_len) tags = np.zeros((attr_len)).astype("f") tags[:] = -1.0 tags[np.argmax(prob2[0:13])]=1.0 tags[27 + np.argmax(prob2[27:])] = 1.0 prob2[prob2<threshold] = -1.0 prob2[prob2>=threshold] = 1.0 for i in range(13, 27): tags[i] = prob2[i] return tags def get_fake_tag_batch(): xp = gen.xp batch = rows*cols tags = xp.zeros((batch, attr_len)).astype("f") for i in range(batch): tags[i] = xp.asarray(get_fake_tag()) return tags def samples_generation(trainer): if not os.path.exists(eval_folder): os.makedirs(eval_folder) z = np.random.normal(size=(rows*cols, latent_len)).astype("f") if gpu>=0: z = cuda.to_gpu(z) tags =get_fake_tag_batch() z = Variable(z, volatile=True) tags = Variable(tags, volatile=True) imgs = gen(F.concat([z,tags]), test=True) save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg", grid_w=rows, grid_h=cols) return samples_generation
def ae_reconstruction(enc, dec, eval_folder, gpu, data_iter, batch_size=32, img_chan=3, img_size=64): @chainer.training.make_extension() def sample_reconstruction(trainer): xp = enc.xp batch = data_iter.next() d_real = xp.zeros((batch_size, img_chan, img_size, img_size)).astype("f") for i in range(batch_size): d_real[i, :] = xp.asarray(batch[i]) x = Variable(d_real, volatile=True) imgs = dec(enc(x, test=True), test=True) save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".rec.jpg", grid_w=batch_size//8, grid_h=8) save_images_grid(d_real, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".real.jpg", grid_w=batch_size//8, grid_h=8) return sample_reconstruction
def main(gpu_id=-1, bs=32, epoch=20, out='./result', resume=''): net = ShallowConv() model = L.Classifier(net) if gpu_id >= 0: chainer.cuda.get_device_from_id(gpu_id) model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.SerialIterator(train, bs) test_iter = chainer.iterators.SerialIterator( test, bs, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) trainer.extend(extensions.ParameterStatistics(model.predictor)) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id)) trainer.extend(extensions.LogReport(log_name='parameter_statistics')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if resume: chainer.serializers.load_npz(resume, trainer) trainer.run()
def main(gpu_id=-1, bs=32, epoch=20, out='./not_layer_result', resume=''): net = ShallowConv() model = L.Classifier(net) if gpu_id >= 0: chainer.cuda.get_device_from_id(gpu_id) model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.SerialIterator(train, bs) test_iter = chainer.iterators.SerialIterator(test, bs, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) trainer.extend(extensions.ParameterStatistics(model.predictor)) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if resume: chainer.serializers.load_npz(resume, trainer) trainer.run()
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.elu_dim = elu_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(LSTMParser, self).__init__( emb_word=L.EmbedID(self.n_words, self.word_dim), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, self.dropout_ratio), lstm_b=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, self.dropout_ratio), linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim), linear_cat2=L.Linear(self.elu_dim, len(self.targets)), linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine=Biaffine(self.dep_dim) )
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path, length=True) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(FastBiaffineLSTMParser, self).__init__( emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=Linear(2 * self.hidden_dim, self.dep_dim), arc_head=Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=Linear(2 * self.hidden_dim, self.dep_dim), rel_head=Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, relu_dim=64, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: # use as supertagger self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.char_dim = char_dim p.hidden_dim = hidden_dim p.relu_dim = relu_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_chars = len(read_model_defs(model_path + "/chars.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(PeepHoleJaLSTMParser, self).__init__( emb_word=L.EmbedID(self.n_words, self.word_dim), emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_f2=DyerLSTM(self.hidden_dim, self.hidden_dim), lstm_b1=DyerLSTM(self.in_dim, self.hidden_dim), lstm_b2=DyerLSTM(self.hidden_dim, self.hidden_dim), linear_cat1=L.Linear(2 * self.hidden_dim, self.relu_dim), linear_cat2=L.Linear(self.relu_dim, len(self.targets)), linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine=Biaffine(self.dep_dim) )
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(FastBiaffineLSTMParser, self).__init__( emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)) )
def __init__(self, model_path, word_dim=None, caps_dim=None, suffix_dim=None): self.model_path = model_path if word_dim is None: # use as supertagger with open(os.path.join(model_path, "tagger_defs.txt")) as defs_file: defs = json.load(defs_file) self.word_dim = defs["word_dim"] self.caps_dim = defs["caps_dim"] self.suffix_dim = defs["suffix_dim"] else: # training self.word_dim = word_dim self.caps_dim = caps_dim self.suffix_dim = suffix_dim self.words = read_model_defs(os.path.join(model_path, "words.txt")) self.suffixes = read_model_defs(os.path.join(model_path, "suffixes.txt")) self.caps = read_model_defs(os.path.join(model_path, "caps.txt")) self.targets = read_model_defs(os.path.join(model_path, "target.txt")) # self.unk_word = self.words["*UNKNOWN*"] self.unk_suffix = self.suffixes["UNK"] in_dim = 7 * (self.word_dim + self.caps_dim + self.suffix_dim) super(EmbeddingTagger, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim), emb_caps=L.EmbedID(len(self.caps), self.caps_dim), emb_suffix=L.EmbedID(len(self.suffixes), self.suffix_dim), linear=L.Linear(in_dim, len(self.targets)), )
def create_traindata(self, outdir): trees = JaCCGReader(self.filepath).readall() # first construct dictionaries only for tree in trees: self._traverse(tree) # construct training samples with # categories whose frequency >= freq_cut. for tree in trees: tokens = get_leaves(tree) words = [token.word for token in tokens] self.sents.append(" ".join(words)) cats = [token.cat.without_semantics for token in tokens] samples = get_context_by_window( words, CONTEXT, lpad=LPAD, rpad=RPAD) assert len(samples) == len(cats) for cat, sample in zip(cats, samples): if self.cats[cat] >= self.cat_freq_cut: self.samples[" ".join(sample)] = cat self.cats = {k: v for (k, v) in self.cats.items() \ if v >= self.cat_freq_cut} self.words = {k: v for (k, v) in self.words.items() \ if v >= self.word_freq_cut} with open(outdir + "/unary_rules.txt", "w") as f: self._write(self.unary_rules, f, comment_out_value=True) with open(outdir + "/seen_rules.txt", "w") as f: self._write(self.seen_rules, f, comment_out_value=True) with open(outdir + "/target.txt", "w") as f: self._write(self.cats, f, comment_out_value=False) with open(outdir + "/words.txt", "w") as f: self._write(self.words, f, comment_out_value=False) with open(outdir + "/chars.txt", "w") as f: self._write(self.chars, f, comment_out_value=False) with open(outdir + "/traindata.json", "w") as f: json.dump(self.samples, f) with open(outdir + "/trainsents.txt", "w") as f: for sent in self.sents: f.write(sent.encode("utf-8") + "\n")
def __init__(self, model_path, word_dim=None, char_dim=None): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: # use as supertagger with open(defs_file) as f: defs = json.load(f) self.word_dim = defs["word_dim"] self.char_dim = defs["char_dim"] else: # training self.word_dim = word_dim self.char_dim = char_dim with open(defs_file, "w") as f: json.dump({"model": self.__class__.__name__, "word_dim": self.word_dim, "char_dim": self.char_dim}, f) self.extractor = FeatureExtractor(model_path) self.targets = read_model_defs(model_path + "/target.txt") self.train = True hidden_dim = 1000 in_dim = WINDOW_SIZE * (self.word_dim + self.char_dim) super(JaCCGEmbeddingTagger, self).__init__( emb_word=L.EmbedID(len(self.extractor.words), self.word_dim), emb_char=L.EmbedID(len(self.extractor.chars), self.char_dim, ignore_label=IGNORE), linear1=L.Linear(in_dim, hidden_dim), linear2=L.Linear(hidden_dim, len(self.targets)), )
def __call__(self, trainer): """Decides whether the extension should be called on this iteration. Args: trainer (~chainer.training.Trainer): Trainer object that this trigger is associated with. The ``observation`` of this trainer is used to determine if the trigger should fire. Returns: bool: ``True`` if the corresponding extension should be invoked in this iteration. """ observation = trainer.observation summary = self._summary key = self._key if key in observation: summary.add({key: observation[key]}) if not self._interval_trigger(trainer): return False stats = summary.compute_mean() value = float(stats[key]) # copy to CPU self._init_summary() if self._best_value is None or self._compare(self._best_value, value): self._best_value = value return False return True
def get_example(self, i): # It reads the i-th image/label pair and return a preprocessed image. # It applies following preprocesses: # - Cropping (random or center rectangular) # - Random flip # - Scaling to [0, 1] value crop_size = self.crop_size image, label = self.base[i] _, h, w = image.shape if self.random: # Randomly crop a region and flip the image top = random.randint(0, h - crop_size - 1) left = random.randint(0, w - crop_size - 1) if random.randint(0, 1): image = image[:, :, ::-1] else: # Crop the center top = (h - crop_size) // 2 left = (w - crop_size) // 2 bottom = top + crop_size right = left + crop_size image = image[:, top:bottom, left:right] image -= self.mean[:, top:bottom, left:right] image *= (1.0 / 255.0) # Scale to [0, 1] return image, label # chainermn.create_multi_node_evaluator can be also used with user customized # evaluator classes that inherit chainer.training.extensions.Evaluator.
def get_stats(self): '''Get statistics of taking snapshots After or during training, checkpointer holds statistics on saving checkpoints such as average time, minimum and maximum time. With this stats users may identify slow nodes or disk, or know average time penalty of taking snapshot and optmize interval to take snapshots. ''' return self.stats.report()
def main(): unit = 1000 batchsize = 100 epoch = 20 model = L.Classifier(MLP(unit, 10)) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch, 'epoch'), out='result') trainer.extend(extensions.Evaluator(test_iter, model)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def fit(model, train, valid, device=-1, batchsize=4096, n_epoch=500, resume=None, alpha=1e-3): if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu(device) optimizer = chainer.optimizers.Adam(alpha) optimizer.setup(model) # Setup iterators train_iter = chainer.iterators.SerialIterator(train, batchsize) valid_iter = chainer.iterators.SerialIterator(valid, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (n_epoch, 'epoch'), out='out_' + str(device)) # Setup logging, printing & saving keys = ['loss', 'rmse', 'bias', 'kld0', 'kld1'] keys += ['kldg', 'kldi', 'hypg', 'hypi'] keys += ['hypglv', 'hypilv'] reports = ['epoch'] reports += ['main/' + key for key in keys] reports += ['validation/main/rmse'] trainer.extend(TestModeEvaluator(valid_iter, model, device=device)) trainer.extend(extensions.Evaluator(valid_iter, model, device=device)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend(extensions.PrintReport(reports)) trainer.extend(extensions.ProgressBar(update_interval=10)) # If previous model detected, resume if resume: print("Loading from {}".format(resume)) chainer.serializers.load_npz(resume, trainer) # Run the model trainer.run()
def pretrain_source_cnn(data, args, epochs=1000): print(":: pretraining source encoder") source_cnn = Loss(num_classes=10) if args.device >= 0: source_cnn.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(source_cnn) train_iterator, test_iterator = data2iterator(data, args.batchsize, multiprocess=False) # train_iterator = chainer.iterators.MultiprocessIterator(data, args.batchsize, n_processes=4) updater = chainer.training.StandardUpdater(iterator=train_iterator, optimizer=optimizer, device=args.device) trainer = chainer.training.Trainer(updater, (epochs, 'epoch') ,out=args.output) # learning rate decay # trainer.extend(extensions.ExponentialShift("alpha", rate=0.9, init=args.learning_rate, target=args.learning_rate*10E-5)) trainer.extend(extensions.Evaluator(test_iterator, source_cnn, device=args.device)) # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch")) trainer.extend(extensions.snapshot_object(optimizer.target, "source_model_epoch_{.updater.epoch}"), trigger=(epochs, "epoch")) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.LogReport(trigger=(1, "epoch"))) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.run() return source_cnn
def train_target_cnn(source, target, source_cnn, target_cnn, args, epochs=10000): print(":: training encoder with target domain") discriminator = Discriminator() if args.device >= 0: source_cnn.to_gpu() target_cnn.to_gpu() discriminator.to_gpu() # target_optimizer = chainer.optimizers.Adam(alpha=1.0E-5, beta1=0.5) target_optimizer = chainer.optimizers.RMSprop(lr=args.lr) # target_optimizer = chainer.optimizers.MomentumSGD(lr=1.0E-4, momentum=0.99) target_optimizer.setup(target_cnn.encoder) target_optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) # discriminator_optimizer = chainer.optimizers.Adam(alpha=1.0E-5, beta1=0.5) discriminator_optimizer = chainer.optimizers.RMSprop(lr=args.lr) # discriminator_optimizer = chainer.optimizers.MomentumSGD(lr=1.0E-4, momentum=0.99) discriminator_optimizer.setup(discriminator) discriminator_optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) source_train_iterator, source_test_iterator = data2iterator(source, args.batchsize, multiprocess=False) target_train_iterator, target_test_iterator = data2iterator(target, args.batchsize, multiprocess=False) updater = ADDAUpdater(source_train_iterator, target_train_iterator, source_cnn, target_optimizer, discriminator_optimizer, args) trainer = chainer.training.Trainer(updater, (epochs, 'epoch'), out=args.output) trainer.extend(extensions.Evaluator(target_test_iterator, target_cnn, device=args.device)) # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch")) trainer.extend(extensions.snapshot_object(target_cnn, "target_model_epoch_{.updater.epoch}"), trigger=(epochs, "epoch")) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.LogReport(trigger=(1, "epoch"))) trainer.extend(extensions.PrintReport( ["epoch", "loss/discrim", "loss/encoder", "validation/main/loss", "validation/main/accuracy", "elapsed_time"])) trainer.run()
def check_train(self, gpu): outdir = tempfile.mkdtemp() print("outdir: {}".format(outdir)) n_classes = 2 batch_size = 32 devices = {'main': gpu} A = np.array([ [0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 0], [0, 1, 0, 0], ]).astype(np.float32) model = graph_cnn.GraphCNN(A, n_out=n_classes) optimizer = optimizers.Adam(alpha=1e-4) optimizer.setup(model) train_dataset = EasyDataset(train=True, n_classes=n_classes) train_iter = chainer.iterators.MultiprocessIterator( train_dataset, batch_size) updater = ParallelUpdater(train_iter, optimizer, devices=devices) trainer = chainer.training.Trainer(updater, (10, 'epoch'), out=outdir) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def parse_args(generators, discriminators, updaters): parser = argparse.ArgumentParser(description='Semantic Segmentation using Adversarial Networks') parser.add_argument('--generator', choices=generators.keys(), default='fcn32s', help='Generator(segmentor) architecture') parser.add_argument('--discriminator', choices=discriminators.keys(), default='largefov', help='Discriminator architecture') parser.add_argument('--updater', choices=updaters.keys(), default='gan', help='Updater') parser.add_argument('--initgen_path', default='pretrained_model/vgg16.npz', help='Pretrained model of generator') parser.add_argument('--initdis_path', default=None, help='Pretrained model of discriminator') parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch') parser.add_argument('--iteration', '-i', type=int, default=100000, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='snapshot', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--evaluate_interval', type=int, default=1000, help='Interval of evaluation') parser.add_argument('--snapshot_interval', type=int, default=10000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=10, help='Interval of displaying log to console') return parser.parse_args()
def fit(self, X, y=None, **kwargs): """If hyper parameters are set to None, then instance's variable is used, this functionality is used Grid search with `set_params` method. Also if instance's variable is not set, _default_hyperparam is used. Usage: model.fit(train_dataset) or model.fit(X, y) Args: train: training dataset, assumes chainer's dataset class test: test dataset for evaluation, assumes chainer's dataset class batchsize: batchsize for both training and evaluation iterator_class: iterator class used for this training, currently assumes SerialIterator or MultiProcessIterator optimizer: optimizer instance to update parameter epoch: training epoch out: directory path to save the result snapshot_frequency (int): snapshot frequency in epoch. Negative value indicates not to take snapshot. dump_graph: Save computational graph info or not, default is False. log_report: Enable LogReport or not plot_report: Enable PlotReport or not print_report: Enable PrintReport or not progress_report: Enable ProgressReport or not resume: specify trainer saved path to resume training. """ kwargs = self.filter_sk_params(self.fit_core, kwargs) return self.fit_core(X, y, **kwargs)
def train_task(args, train_name, model, epoch_num, train_dataset, test_dataset_dict, batch_size): optimizer = optimizers.SGD() optimizer.setup(model) train_iter = iterators.SerialIterator(train_dataset, batch_size) test_iter_dict = {name: iterators.SerialIterator( test_dataset, batch_size, repeat=False, shuffle=False) for name, test_dataset in test_dataset_dict.items()} updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch_num, 'epoch'), out=args.out) for name, test_iter in test_iter_dict.items(): trainer.extend(extensions.Evaluator(test_iter, model), name) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss'] + [test+'/main/loss' for test in test_dataset_dict.keys()] + ['main/accuracy'] + [test+'/main/accuracy' for test in test_dataset_dict.keys()])) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.PlotReport( [test+"/main/accuracy" for test in test_dataset_dict.keys()], file_name=train_name+".png")) trainer.run()
def main(): output_dim = 10 parser = argparse.ArgumentParser(description='EWC MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=800, help='Number of sweeps over the dataset to train') """ parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') """ parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--lam', '-l', type=float, default=15., help='lambda parameter for EWC loss') parser.add_argument('--num_samples', '-n', type=int, default=200, help='number of samples to compute fisher') parser.add_argument('--hidden', '-hi', type=int, default=50, help='number of hidden node') parser.add_argument('--skip_taskA', '-s', type=bool, default=False, help='whether skip training taskA or not') args = parser.parse_args() model = EWC(MLP(args.hidden, output_dim), args.lam, args.num_samples) train, test = chainer.datasets.get_mnist() train2, test2 = permutate_mnist([train, test]) print("Train without EWC") train_tasks_continuosly(args, model, train, test, train2, test2, enable_ewc=False) print("Train with EWC") train_tasks_continuosly(args, model, train, test, train2, test2, enable_ewc=True)
def create_updater(train_iter, optimizer, devices): if HAVE_NCCL and len(devices) > 1: updater = training.updaters.MultiprocessParallelUpdater( train_iter, optimizer, devices=devices) elif len(devices) > 1: optimizer.lr /= len(devices) updater = training.ParallelUpdater( train_iter, optimizer, devices=devices) else: updater = training.StandardUpdater( train_iter, optimizer, device=devices['main']) return updater
def create_updater(train_iter, optimizer, device): updater = training.StandardUpdater(train_iter, optimizer, device=device) return updater
def main(config_file): with open(config_file) as fp: conf = json.load(fp) fe_conf = conf['feature_extractor'] cl_conf = conf['classifier'] fe_class = getattr(cnn_feature_extractors, fe_conf['model']) feature_extractor = fe_class(n_classes=fe_conf['n_classes'], n_base_units=fe_conf['n_base_units']) chainer.serializers.load_npz(fe_conf['out_file'], feature_extractor) model = classifiers.MLPClassifier(cl_conf['n_classes'], feature_extractor) optimizer = chainer.optimizers.Adam() optimizer.setup(model) device = cl_conf.get('device', -1) train_dataset = feature_dataset(os.path.join(cl_conf['dataset_path'], 'train'), model) train_iter = chainer.iterators.SerialIterator(train_dataset, conf.get('batch_size', 1)) updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (cl_conf['epoch'], 'epoch'), out='out_re') trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) test_dataset_path = os.path.join(cl_conf['dataset_path'], 'test') if os.path.exists(test_dataset_path): test_dataset = feature_dataset(test_dataset_path, model) test_iter = chainer.iterators.SerialIterator(test_dataset, 10, repeat=False, shuffle=False) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) else: trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy'])) trainer.run() chainer.serializers.save_npz(cl_conf['out_file'], model)
def main(config_file): with open(config_file) as fp: conf = json.load(fp)['feature_extractor'] model_class = getattr(cnn_feature_extractors, conf['model']) model = model_class(conf['n_classes'], conf['n_base_units']) resume_file = conf['out_file'] + '.to_resume' if os.path.exists(resume_file): chainer.serializers.load_npz(resume_file, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) device = conf.get('device', -1) train_dataset = create_dataset(os.path.join(conf['dataset_path'], 'train')) train_iter = chainer.iterators.SerialIterator(train_dataset, conf.get('batch_size', 10)) updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (conf['epoch'], 'epoch'), out='out') trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.ProgressBar(update_interval=10)) test_dataset_path = os.path.join(conf['dataset_path'], 'test') if os.path.exists(test_dataset_path): test_dataset = create_dataset(test_dataset_path) test_iter = chainer.iterators.SerialIterator(test_dataset, 20, repeat=False, shuffle=False) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) else: trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy'])) trainer.run() model = model.to_cpu() chainer.serializers.save_npz(conf['out_file'], model)
def evaluation(gen_g, gen_f, test_image_folder, image_size=256, side=2): @chainer.training.make_extension() def _eval(trainer, it): xp = gen_g.xp batch = it.next() batchsize = len(batch) #x = [] x = xp.zeros((batchsize, 3, image_size, image_size)).astype("f") t = xp.zeros((batchsize, 3, image_size, image_size)).astype("f") for i in range(batchsize): x[i, :] = xp.asarray(batch[i][0]) t[i, :] = xp.asarray(batch[i][1]) x = Variable(x) result = gen_g(x, test=True) img = result.data.get() img_c = img.reshape((side, side, 3, image_size, image_size)) img_c = img_c.transpose(0,1,3,4,2) img_c = (img + 1) *127.5 img_c = np.clip(img_c, 0, 255) img_c = img_c.astype(np.uint8) img_c = img_c.reshape((side, side, image_size, image_size, 3)).transpose(0,2,1,3,4).reshape((side*image_size, side*image_size, 3))[:,:,::-1] Image.fromarray(img_c).save(test_image_folder+"/iter_"+str(trainer.updater.iteration)+"_G.jpg") t = Variable(t) result = gen_f(t, test=True) img_t = result.data.get() img_t = img_t.reshape( (side, side, 3, image_size, image_size)) img_t = img_t.transpose(0,1,3,4,2) img_t = (img + 1) *127.5 img_t = np.clip(img_t, 0, 255) img_t = img_t.astype(np.uint8) img_t = img_t.reshape((side, side, image_size, image_size, 3)).transpose(0,2,1,3,4).reshape((side*image_size, side*image_size, 3))[:,:,::-1] #print(img_t) Image.fromarray(img_t).save(test_image_folder+"/iter_"+str(trainer.updater.iteration)+"_F.jpg") def evaluation(trainer): it = trainer.updater.get_iterator('test') _eval(trainer, it) return evaluation
def train_main(args): """ trains model specfied in args. main method for train subcommand. """ # load text with open(args.text_path) as f: text = f.read() logger.info("corpus length: %s.", len(text)) # data iterator data_iter = DataIterator(text, args.batch_size, args.seq_len) # load or build model if args.restore: logger.info("restoring model.") load_path = args.checkpoint_path if args.restore is True else args.restore model = load_model(load_path) else: net = Network(vocab_size=VOCAB_SIZE, embedding_size=args.embedding_size, rnn_size=args.rnn_size, num_layers=args.num_layers, drop_rate=args.drop_rate) model = L.Classifier(net) # make checkpoint directory log_dir = make_dirs(args.checkpoint_path) with open("{}.json".format(args.checkpoint_path), "w") as f: json.dump(model.predictor.args, f, indent=2) chainer.serializers.save_npz(args.checkpoint_path, model) logger.info("model saved: %s.", args.checkpoint_path) # optimizer optimizer = chainer.optimizers.Adam(alpha=args.learning_rate) optimizer.setup(model) # clip gradient norm optimizer.add_hook(chainer.optimizer.GradientClipping(args.clip_norm)) # trainer updater = BpttUpdater(data_iter, optimizer) trainer = chainer.training.Trainer(updater, (args.num_epochs, 'epoch'), out=log_dir) trainer.extend(extensions.snapshot_object(model, filename=os.path.basename(args.checkpoint_path))) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PlotReport(y_keys=["main/loss"])) trainer.extend(LoggerExtension(text)) # training start model.predictor.reset_state() logger.info("start of training.") time_train = time.time() trainer.run() # training end duration_train = time.time() - time_train logger.info("end of training, duration: %ds.", duration_train) # generate text seed = generate_seed(text) generate_text(model, seed, 1024, 3) return model
def create_trainer( config, project_path, updater, model, eval_func, iterator_test, iterator_train_varidation, loss_names, converter=chainer.dataset.convert.concat_examples, ): # type: (TrainConfig, str, any, typing.Dict, any, any, any, any, any) -> any def _make_evaluator(iterator): return utility.chainer_utility.NoVariableEvaluator( iterator, target=model, converter=converter, eval_func=eval_func, device=config.gpu, ) trainer = chainer.training.Trainer(updater, out=project_path) log_trigger = (config.log_iteration, 'iteration') save_trigger = (config.save_iteration, 'iteration') eval_test_name = 'eval/test' eval_train_name = 'eval/train' snapshot = extensions.snapshot_object(model['main'], '{.updater.iteration}.model') trainer.extend(snapshot, trigger=save_trigger) trainer.extend(extensions.dump_graph('main/' + loss_names[0], out_name='main.dot')) trainer.extend(_make_evaluator(iterator_test), name=eval_test_name, trigger=log_trigger) trainer.extend(_make_evaluator(iterator_train_varidation), name=eval_train_name, trigger=log_trigger) report_target = [] for evaluator_name in ['', eval_test_name + '/', eval_train_name + '/']: for model_name in ['main/']: for loss_name in loss_names: report_target.append(evaluator_name + model_name + loss_name) trainer.extend(extensions.LogReport(trigger=log_trigger, log_name="log.txt")) trainer.extend(extensions.PrintReport(report_target)) return trainer
def train(args): model = LSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers, args.hidden_dim, args.elu_dim, args.dep_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print 'Load model from', args.initmodel chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print 'Load pretrained word embeddings from', args.pretrained model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() train = LSTMParserDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = LSTMTagger(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() train = LSTMTaggerDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMTaggerDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 2000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator( val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = BiaffineJaLSTMParser(args.model, args.word_emb_size, args.char_emb_size, args.nlayers, args.hidden_dim, args.dep_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() train = LSTMParserDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(2e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.ExponentialShift( "eps", .75, 2e-3), trigger=(2500, 'iteration')) trainer.extend(extensions.Evaluator( val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, relu_dim=64, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: # use as supertagger with open(defs_file) as f: defs = json.load(f) self.dep_dim = defs["dep_dim"] self.word_dim = defs["word_dim"] self.char_dim = defs["char_dim"] self.hidden_dim = defs["hidden_dim"] self.relu_dim = defs["relu_dim"] self.nlayers = defs["nlayers"] self.train = False self.extractor = FeatureExtractor(model_path) else: # training self.dep_dim = dep_dim self.word_dim = word_dim self.char_dim = char_dim self.hidden_dim = hidden_dim self.relu_dim = relu_dim self.nlayers = nlayers self.train = True with open(defs_file, "w") as f: json.dump({"model": self.__class__.__name__, "word_dim": self.word_dim, "char_dim": self.char_dim, "hidden_dim": hidden_dim, "relu_dim": relu_dim, "nlayers": nlayers, "dep_dim": dep_dim}, f) self.targets = read_model_defs(model_path + "/target.txt") self.words = read_model_defs(model_path + "/words.txt") self.chars = read_model_defs(model_path + "/chars.txt") self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(JaLSTMParser, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim), emb_char=L.EmbedID(len(self.chars), 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, self.dropout_ratio), lstm_b=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, self.dropout_ratio), linear_cat1=L.Linear(2 * self.hidden_dim, self.relu_dim), linear_cat2=L.Linear(self.relu_dim, len(self.targets)), linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine=Biaffine(self.dep_dim) )
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, relu_dim=64, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: # use as supertagger with open(defs_file) as f: defs = json.load(f) self.word_dim = defs["word_dim"] self.char_dim = defs["char_dim"] self.hidden_dim = defs["hidden_dim"] self.relu_dim = defs["relu_dim"] self.nlayers = defs["nlayers"] self.train = False self.extractor = FeatureExtractor(model_path) else: # training self.word_dim = word_dim self.char_dim = char_dim self.hidden_dim = hidden_dim self.relu_dim = relu_dim self.nlayers = nlayers self.train = True with open(defs_file, "w") as f: json.dump({"model": self.__class__.__name__, "word_dim": self.word_dim, "char_dim": self.char_dim, "hidden_dim": hidden_dim, "relu_dim": relu_dim, "nlayers": nlayers}, f) self.targets = read_model_defs(model_path + "/target.txt") self.words = read_model_defs(model_path + "/words.txt") self.chars = read_model_defs(model_path + "/chars.txt") self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(JaLSTMTagger, self).__init__( emb_word=L.EmbedID(len(self.words), self.word_dim), emb_char=L.EmbedID(len(self.chars), 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, 0.), lstm_b=L.NStepLSTM(nlayers, self.in_dim, self.hidden_dim, 0.), conv1=L.Convolution2D(1, 2 * self.hidden_dim, (7, 2 * self.hidden_dim), stride=1, pad=(3, 0)), linear1=L.Linear(2 * self.hidden_dim, self.relu_dim), linear2=L.Linear(self.relu_dim, len(self.targets)), )
def train(args): model = LSTMTagger(args.model, args.word_emb_size, args.char_emb_size, args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) train = LSTMTaggerDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMTaggerDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator( val_iter, eval_model, converter), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = PeepHoleJaLSTMParser(args.model, args.word_emb_size, args.char_emb_size, args.nlayers, args.hidden_dim, args.relu_dim, args.dep_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() converter = lambda x, device: \ concat_examples(x, device=device, padding=-1) train = LSTMParserDataset(args.model, args.train) train_iter = SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 2000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_loss', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = LSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers, args.hidden_dim, args.elu_dim, args.dep_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() train = LSTMParserDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def compress_traindata(args): words = OrderedDict() print("reading embedding vocabulary") for word in open(args.vocab): words[word.strip()] = 1 suffixes = defaultdict(int) suffixes["UNK"] = 1 caps = defaultdict(int) target = defaultdict(int) traindata = open(args.path) len_traindata = 0 print("reading training file") for line in traindata: len_traindata += 1 items = line.strip().split(" ") target[items[-1]] += 1 for item in items[:-1]: word, suffix, cap = item.split("|") if words.has_key(word): words[word] += 1 else: words[word] = 1 suffixes[suffix] += 1 caps[cap] += 1 def out_dict(d, outfile, freq_cut=-1): print("writing to {}".format(outfile)) res = {} with open(outfile, "w") as out: i = 0 for item, n in d.items(): if freq_cut <= n: out.write("{} {}\n".format(item, n)) res[item] = i i += 1 return res word2id = out_dict(words, os.path.join(args.out, "words.txt")) suffix2id = out_dict(suffixes, os.path.join(args.out, "suffixes.txt")) cap2id = out_dict(caps, os.path.join(args.out, "caps.txt")) target2id = out_dict(target, os.path.join(args.out, "target.txt"), freq_cut=10) traindata.seek(0) new_traindata = os.path.join(args.out, "traindata.txt") print("writing to {}".format(new_traindata)) with open(new_traindata, "w") as out: for i, line in enumerate(traindata): items = line.strip().split(" ") if not target2id.has_key(items[-1]): continue target =items[-1] new_line = "" for j, item in enumerate(items[:-1]): word, suffix, cap = item.split("|") if not word2id.has_key(word): word = "*UNKNOWN*" if not suffix2id.has_key(suffix): suffix = "UNK" new_line += "|".join([word, suffix, cap]) + " " out.write(new_line + target + "\n")
def train(args): model = JaCCGEmbeddingTagger(args.model, args.word_emb_size, args.char_emb_size) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) train = JaCCGTaggerDataset(args.model, args.train) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val = JaCCGTaggerDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.AdaGrad() optimizer.setup(model) # optimizer.add_hook(WeightDecay(1e-8)) my_converter = lambda x, dev: convert.concat_examples(x, dev, (None,-1,None,None)) updater = training.StandardUpdater(train_iter, optimizer, converter=my_converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator( val_iter, eval_model, my_converter), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = PeepHoleLSTMTagger(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print('Load pretrained word embeddings from', args.pretrained) model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() converter = lambda x, device: \ concat_examples(x, device=device, padding=-1) train = LSTMTaggerDataset(args.model, args.train) train_iter = SerialIterator(train, args.batchsize) val = LSTMTaggerDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator( val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def train(args): model = PeepHoleLSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers, args.hidden_dim, args.elu_dim, args.dep_dim, args.dropout_ratio, args.gpu >= 0) with open(args.model + "/params", "w") as f: log(args, f) if args.initmodel: print 'Load model from', args.initmodel chainer.serializers.load_npz(args.initmodel, model) if args.pretrained: print 'Load pretrained word embeddings from', args.pretrained model.load_pretrained_embeddings(args.pretrained) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() converter = lambda x, device: \ concat_examples(x, device=device, padding=-1) train = LSTMParserDataset(args.model, args.train) train_iter = SerialIterator(train, args.batchsize) val = LSTMParserDataset(args.model, args.val) val_iter = chainer.iterators.SerialIterator( val, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.Adam(beta2=0.9) # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7) optimizer.setup(model) optimizer.add_hook(WeightDecay(1e-6)) # optimizer.add_hook(GradientClipping(5.)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model) val_interval = 1000, 'iteration' log_interval = 200, 'iteration' eval_model = model.copy() eval_model.train = False trainer.extend(extensions.Evaluator(val_iter, eval_model, converter, device=args.gpu), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/tagging_loss', 'main/tagging_accuracy', 'main/tagging_loss', 'main/parsing_accuracy', 'main/parsing_loss', 'validation/main/tagging_loss', 'validation/main/tagging_accuracy', 'validation/main/parsing_loss', 'validation/main/parsing_accuracy' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def start(self): """ Train pose net. """ # set random seed. if self.seed is not None: random.seed(self.seed) np.random.seed(self.seed) if self.gpu >= 0: chainer.cuda.cupy.random.seed(self.seed) # initialize model to train. model = AlexNet(self.Nj, self.use_visibility) if self.resume_model: serializers.load_npz(self.resume_model, model) # prepare gpu. if self.gpu >= 0: chainer.cuda.get_device(self.gpu).use() model.to_gpu() # load the datasets. train = PoseDataset(self.train, data_augmentation=self.data_augmentation) val = PoseDataset(self.val, data_augmentation=False) # training/validation iterators. train_iter = chainer.iterators.MultiprocessIterator( train, self.batchsize) val_iter = chainer.iterators.MultiprocessIterator( val, self.batchsize, repeat=False, shuffle=False) # set up an optimizer. optimizer = self._get_optimizer() optimizer.setup(model) if self.resume_opt: chainer.serializers.load_npz(self.resume_opt, optimizer) # set up a trainer. updater = training.StandardUpdater(train_iter, optimizer, device=self.gpu) trainer = training.Trainer( updater, (self.epoch, 'epoch'), os.path.join(self.out, 'chainer')) # standard trainer settings trainer.extend(extensions.dump_graph('main/loss')) val_interval = (10, 'epoch') trainer.extend(TestModeEvaluator(val_iter, model, device=self.gpu), trigger=val_interval) # save parameters and optimization state per validation step resume_interval = (self.epoch/10, 'epoch') trainer.extend(extensions.snapshot_object( model, "epoch-{.updater.epoch}.model"), trigger=resume_interval) trainer.extend(extensions.snapshot_object( optimizer, "epoch-{.updater.epoch}.state"), trigger=resume_interval) trainer.extend(extensions.snapshot( filename="epoch-{.updater.epoch}.iter"), trigger=resume_interval) # show log log_interval = (10, "iteration") trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'lr']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # start training if self.resume: chainer.serializers.load_npz(self.resume, trainer) trainer.run()
def train(args): time_start = timer() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() cuda.check_cuda_available() if args.path_vocab == '': vocab = create_from_dir(args.path_corpus) else: vocab = Vocabulary() vocab.load(args.path_vocab) logger.info("loaded vocabulary") if args.context_representation != 'word': # for deps or ner context representation, we need a new context vocab for NS or HSM loss function. vocab_context = create_from_annotated_dir(args.path_corpus, representation=args.context_representation) else : vocab_context = vocab loss_func = get_loss_func(args, vocab_context) model = get_model(args, loss_func, vocab) if args.gpu >= 0: model.to_gpu() logger.debug("model sent to gpu") optimizer = chainer.optimizers.Adam() optimizer.setup(model) if os.path.isfile(args.path_corpus): train, val = get_data(args.path_corpus, vocab) if args.test: train = train[:100] val = val[:100] train_iter = WindowIterator(train, args.window, args.batchsize) val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False) else: train_iter = DirWindowIterator(path=args.path_corpus, vocab=vocab, window_size=args.window, batch_size=args.batchsize) updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.path_out) if os.path.isfile(args.path_corpus): trainer.extend(extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu)) trainer.extend(extensions.LogReport()) if os.path.isfile(args.path_corpus): trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) else: trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time'])) # trainer.extend(extensions.ProgressBar()) trainer.run() model = create_model(args, model, vocab) time_end = timer() model.metadata["execution_time"] = time_end - time_start return model
def main(options): #load the config params gpu = options['gpu'] data_path = options['path_dataset'] embeddings_path = options['path_vectors'] n_epoch = options['epochs'] batch_size = options['batchsize'] test = options['test'] embed_dim = options['embed_dim'] freeze = options['freeze_embeddings'] distance_embed_dim = options['distance_embed_dim'] #load the data data_processor = DataProcessor(data_path) data_processor.prepare_dataset() train_data = data_processor.train_data test_data = data_processor.test_data vocab = data_processor.vocab cnn = CNN(n_vocab=len(vocab), input_channel=1, output_channel=100, n_label=19, embed_dim=embed_dim, position_dims=distance_embed_dim, freeze=freeze) cnn.load_embeddings(embeddings_path, data_processor.vocab) model = L.Classifier(cnn) #use GPU if flag is set if gpu >= 0: model.to_gpu() #setup the optimizer optimizer = O.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, batch_size) test_iter = chainer.iterators.SerialIterator(test_data, batch_size,repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, converter=convert.concat_examples, device=gpu) trainer = training.Trainer(updater, (n_epoch, 'epoch')) # Evaluation test_model = model.copy() test_model.predictor.train = False trainer.extend(extensions.Evaluator(test_iter, test_model, device=gpu, converter=convert.concat_examples)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(options): #load the config params gpu = options['gpu'] data_path = options['path_dataset'] embeddings_path = options['path_vectors'] n_epoch = options['epochs'] batchsize = options['batchsize'] test = options['test'] embed_dim = options['embed_dim'] freeze = options['freeze_embeddings'] #load the data data_processor = DataProcessor(data_path, test) data_processor.prepare_dataset() train_data = data_processor.train_data dev_data = data_processor.dev_data test_data = data_processor.test_data vocab = data_processor.vocab cnn = CNN(n_vocab=len(vocab), input_channel=1, output_channel=10, n_label=2, embed_dim=embed_dim, freeze=freeze) cnn.load_embeddings(embeddings_path, data_processor.vocab) model = L.Classifier(cnn) if gpu >= 0: model.to_gpu() #setup the optimizer optimizer = O.Adam() optimizer.setup(model) train_iter = chainer.iterators.SerialIterator(train_data, batchsize) dev_iter = chainer.iterators.SerialIterator(dev_data, batchsize,repeat=False, shuffle=False) test_iter = chainer.iterators.SerialIterator(test_data, batchsize,repeat=False, shuffle=False) batch1 = train_iter.next() batch2 = dev_iter.next() updater = training.StandardUpdater(train_iter, optimizer, converter=util.concat_examples, device=gpu) trainer = training.Trainer(updater, (n_epoch, 'epoch')) # Evaluation eval_model = model.copy() eval_model.predictor.train = False trainer.extend(extensions.Evaluator(dev_iter, eval_model, device=gpu, converter=util.concat_examples)) test_model = model.copy() test_model.predictor.train = False trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()