Python chainer 模块,training() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.training()

项目:char-rnn-text-generation    作者:yxtay    | 项目源码 | 文件源码
def __call__(self, trainer):
        duration_epoch = time.time() - self.time_epoch
        epoch = trainer.updater.epoch
        loss = trainer.observation["main/loss"].data
        logger.info("epoch: %s, duration: %ds, loss: %.6g.",
                    epoch, duration_epoch, loss)

        # get rnn state
        model = trainer.updater.get_optimizer("main").target
        state = model.predictor.get_state()
        # generate text
        seed = generate_seed(self.text)
        generate_text(model, seed)
        # set rnn back to training state
        model.predictor.set_state(state)

        # reset time
        self.time_epoch = time.time()
项目:chainer-examples    作者:nocotan    | 项目源码 | 文件源码
def main():
    model = L.Classifier(CNN())

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist(ndim=3)
    train_iter = chainer.iterators.SerialIterator(train, batch_size=100)
    test_iter = chainer.iterators.SerialIterator(test, batch_size=100, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (5, 'epoch'), out='result')

    trainer.extend(extensions.Evaluator(test_iter, model))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy','validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def gan_sampling(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128):
    @chainer.training.make_extension()
    def samples_generation(trainer):
        if not os.path.exists(eval_folder):
            os.makedirs(eval_folder)
        z = np.random.normal(size=(rows*cols, latent_len)).astype("f")
        if gpu>=0:
            z = cuda.to_gpu(z)
        z = Variable(z, volatile=True)
        imgs = gen(z, test=True)
        save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg",
            grid_w=rows, grid_h=cols)

    return samples_generation
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def gan_sampling_tags(gen, eval_folder, gpu, rows=6, cols=6, latent_len=128, attr_len=38, threshold=0.25):
    @chainer.training.make_extension()
    def get_fake_tag():
        prob2 = np.random.rand(attr_len)
        tags = np.zeros((attr_len)).astype("f")
        tags[:] = -1.0
        tags[np.argmax(prob2[0:13])]=1.0
        tags[27 + np.argmax(prob2[27:])] = 1.0
        prob2[prob2<threshold] = -1.0
        prob2[prob2>=threshold] = 1.0
        for i in range(13, 27):
            tags[i] = prob2[i]
        return tags

    def get_fake_tag_batch():
        xp = gen.xp
        batch = rows*cols
        tags = xp.zeros((batch, attr_len)).astype("f")
        for i in range(batch):
            tags[i] = xp.asarray(get_fake_tag())
        return tags

    def samples_generation(trainer):
        if not os.path.exists(eval_folder):
            os.makedirs(eval_folder)
        z = np.random.normal(size=(rows*cols, latent_len)).astype("f")
        if gpu>=0:
            z = cuda.to_gpu(z)
        tags =get_fake_tag_batch()
        z = Variable(z, volatile=True)
        tags = Variable(tags, volatile=True)
        imgs = gen(F.concat([z,tags]), test=True)
        save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".jpg",
            grid_w=rows, grid_h=cols)

    return samples_generation
项目:chainer-gan-experiments    作者:Aixile    | 项目源码 | 文件源码
def ae_reconstruction(enc, dec, eval_folder, gpu, data_iter, batch_size=32, img_chan=3, img_size=64):
    @chainer.training.make_extension()
    def sample_reconstruction(trainer):
        xp = enc.xp
        batch = data_iter.next()
        d_real = xp.zeros((batch_size, img_chan, img_size, img_size)).astype("f")
        for i in range(batch_size):
            d_real[i, :] = xp.asarray(batch[i])
        x = Variable(d_real, volatile=True)
        imgs = dec(enc(x, test=True), test=True)
        save_images_grid(imgs, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".rec.jpg",
            grid_w=batch_size//8, grid_h=8)
        save_images_grid(d_real, path=eval_folder+"/iter_"+str(trainer.updater.iteration)+".real.jpg",
            grid_w=batch_size//8, grid_h=8)

    return sample_reconstruction
项目:instance_normalization_chainer    作者:crcrpar    | 项目源码 | 文件源码
def main(gpu_id=-1, bs=32, epoch=20, out='./result', resume=''):
    net = ShallowConv()
    model = L.Classifier(net)
    if gpu_id >= 0:
        chainer.cuda.get_device_from_id(gpu_id)
        model.to_gpu()
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist(ndim=3)
    train_iter = chainer.iterators.SerialIterator(train, bs)
    test_iter = chainer.iterators.SerialIterator(
        test, bs, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)
    trainer.extend(extensions.ParameterStatistics(model.predictor))
    trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
    trainer.extend(extensions.LogReport(log_name='parameter_statistics'))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if resume:
        chainer.serializers.load_npz(resume, trainer)

    trainer.run()
项目:instance_normalization_chainer    作者:crcrpar    | 项目源码 | 文件源码
def main(gpu_id=-1, bs=32, epoch=20, out='./not_layer_result', resume=''):
    net = ShallowConv()
    model = L.Classifier(net)
    if gpu_id >= 0:
        chainer.cuda.get_device_from_id(gpu_id)
        model.to_gpu()
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist(ndim=3)
    train_iter = chainer.iterators.SerialIterator(train, bs)
    test_iter = chainer.iterators.SerialIterator(test, bs, repeat=False,
                                                 shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)
    trainer.extend(extensions.ParameterStatistics(model.predictor))
    trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if resume:
        chainer.serializers.load_npz(resume, trainer)

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
            hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            self.train = False
            Param.load(self, defs_file)
            self.extractor = FeatureExtractor(model_path)
        else:
            # training
            self.train = True
            p = Param(self)
            p.dep_dim = dep_dim
            p.word_dim = word_dim
            p.afix_dim = afix_dim
            p.hidden_dim = hidden_dim
            p.elu_dim = elu_dim
            p.nlayers = nlayers
            p.n_words = len(read_model_defs(model_path + "/words.txt"))
            p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
            p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
            p.targets = read_model_defs(model_path + "/target.txt")
            p.dump(defs_file)

        self.in_dim = self.word_dim + 8 * self.afix_dim
        self.dropout_ratio = dropout_ratio
        super(LSTMParser, self).__init__(
                emb_word=L.EmbedID(self.n_words, self.word_dim),
                emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
                emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
                lstm_f=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, self.dropout_ratio),
                lstm_b=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, self.dropout_ratio),
                linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim),
                linear_cat2=L.Linear(self.elu_dim, len(self.targets)),
                linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
                linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
                biaffine=Biaffine(self.dep_dim)
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
            hidden_dim=128, dep_dim=100, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            self.train = False
            Param.load(self, defs_file)
            self.extractor = FeatureExtractor(model_path, length=True)
        else:
            # training
            self.train = True
            p = Param(self)
            p.dep_dim = dep_dim
            p.word_dim = word_dim
            p.afix_dim = afix_dim
            p.hidden_dim = hidden_dim
            p.nlayers = nlayers
            p.n_words = len(read_model_defs(model_path + "/words.txt"))
            p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
            p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
            p.targets = read_model_defs(model_path + "/target.txt")
            p.dump(defs_file)

        self.in_dim = self.word_dim + 8 * self.afix_dim
        self.dropout_ratio = dropout_ratio
        super(FastBiaffineLSTMParser, self).__init__(
                emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE),
                emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
                emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
                lstm_f=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
                lstm_b=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
                arc_dep=Linear(2 * self.hidden_dim, self.dep_dim),
                arc_head=Linear(2 * self.hidden_dim, self.dep_dim),
                rel_dep=Linear(2 * self.hidden_dim, self.dep_dim),
                rel_head=Linear(2 * self.hidden_dim, self.dep_dim),
                biaffine_arc=Biaffine(self.dep_dim),
                biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2,
            hidden_dim=128, relu_dim=64, dep_dim=100, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            # use as supertagger
            self.train = False
            Param.load(self, defs_file)
            self.extractor = FeatureExtractor(model_path)
        else:
            # training
            self.train = True
            p = Param(self)
            p.dep_dim = dep_dim
            p.word_dim = word_dim
            p.char_dim = char_dim
            p.hidden_dim = hidden_dim
            p.relu_dim = relu_dim
            p.nlayers = nlayers
            p.n_words = len(read_model_defs(model_path + "/words.txt"))
            p.n_chars = len(read_model_defs(model_path + "/chars.txt"))
            p.targets = read_model_defs(model_path + "/target.txt")
            p.dump(defs_file)

        self.in_dim = self.word_dim + self.char_dim
        self.dropout_ratio = dropout_ratio
        super(PeepHoleJaLSTMParser, self).__init__(
                emb_word=L.EmbedID(self.n_words, self.word_dim),
                emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE),
                conv_char=L.Convolution2D(1, self.char_dim,
                    (3, 50), stride=1, pad=(1, 0)),
                lstm_f1=DyerLSTM(self.in_dim, self.hidden_dim),
                lstm_f2=DyerLSTM(self.hidden_dim, self.hidden_dim),
                lstm_b1=DyerLSTM(self.in_dim, self.hidden_dim),
                lstm_b2=DyerLSTM(self.hidden_dim, self.hidden_dim),
                linear_cat1=L.Linear(2 * self.hidden_dim, self.relu_dim),
                linear_cat2=L.Linear(self.relu_dim, len(self.targets)),
                linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
                linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
                biaffine=Biaffine(self.dep_dim)
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
            hidden_dim=128, elu_dim=64, dep_dim=100, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            self.train = False
            Param.load(self, defs_file)
            self.extractor = FeatureExtractor(model_path)
        else:
            # training
            self.train = True
            p = Param(self)
            p.dep_dim = dep_dim
            p.word_dim = word_dim
            p.afix_dim = afix_dim
            p.hidden_dim = hidden_dim
            p.elu_dim = elu_dim
            p.nlayers = nlayers
            p.n_words = len(read_model_defs(model_path + "/words.txt"))
            p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
            p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
            p.targets = read_model_defs(model_path + "/target.txt")
            p.dump(defs_file)

        self.in_dim = self.word_dim + 8 * self.afix_dim
        self.dropout_ratio = dropout_ratio
        super(LSTMParser, self).__init__(
                emb_word=L.EmbedID(self.n_words, self.word_dim),
                emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
                emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
                lstm_f=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, self.dropout_ratio),
                lstm_b=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, self.dropout_ratio),
                linear_cat1=L.Linear(2 * self.hidden_dim, self.elu_dim),
                linear_cat2=L.Linear(self.elu_dim, len(self.targets)),
                linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
                linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
                biaffine=Biaffine(self.dep_dim)
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2,
            hidden_dim=128, dep_dim=100, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            self.train = False
            Param.load(self, defs_file)
            self.extractor = FeatureExtractor(model_path)
        else:
            # training
            self.train = True
            p = Param(self)
            p.dep_dim = dep_dim
            p.word_dim = word_dim
            p.afix_dim = afix_dim
            p.hidden_dim = hidden_dim
            p.nlayers = nlayers
            p.n_words = len(read_model_defs(model_path + "/words.txt"))
            p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt"))
            p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt"))
            p.targets = read_model_defs(model_path + "/target.txt")
            p.dump(defs_file)

        self.in_dim = self.word_dim + 8 * self.afix_dim
        self.dropout_ratio = dropout_ratio
        super(FastBiaffineLSTMParser, self).__init__(
                emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE),
                emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE),
                emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE),
                lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
                lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32),
                arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
                arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
                rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
                rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
                biaffine_arc=Biaffine(self.dep_dim),
                biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets))
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, caps_dim=None, suffix_dim=None):
        self.model_path = model_path
        if word_dim is None:
            # use as supertagger
            with open(os.path.join(model_path, "tagger_defs.txt")) as defs_file:
                defs = json.load(defs_file)
            self.word_dim = defs["word_dim"]
            self.caps_dim = defs["caps_dim"]
            self.suffix_dim = defs["suffix_dim"]
        else:
            # training
            self.word_dim = word_dim
            self.caps_dim = caps_dim
            self.suffix_dim = suffix_dim

        self.words = read_model_defs(os.path.join(model_path, "words.txt"))
        self.suffixes = read_model_defs(os.path.join(model_path, "suffixes.txt"))
        self.caps = read_model_defs(os.path.join(model_path, "caps.txt"))
        self.targets = read_model_defs(os.path.join(model_path, "target.txt"))

        # self.unk_word = self.words["*UNKNOWN*"]
        self.unk_suffix = self.suffixes["UNK"]

        in_dim = 7 * (self.word_dim + self.caps_dim + self.suffix_dim)
        super(EmbeddingTagger, self).__init__(
                emb_word=L.EmbedID(len(self.words), self.word_dim),
                emb_caps=L.EmbedID(len(self.caps), self.caps_dim),
                emb_suffix=L.EmbedID(len(self.suffixes), self.suffix_dim),
                linear=L.Linear(in_dim, len(self.targets)),
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def create_traindata(self, outdir):
        trees = JaCCGReader(self.filepath).readall()
        # first construct dictionaries only
        for tree in trees:
            self._traverse(tree)
        # construct training samples with
        # categories whose frequency >= freq_cut.
        for tree in trees:
            tokens = get_leaves(tree)
            words = [token.word for token in tokens]
            self.sents.append(" ".join(words))
            cats = [token.cat.without_semantics for token in tokens]
            samples = get_context_by_window(
                    words, CONTEXT, lpad=LPAD, rpad=RPAD)
            assert len(samples) == len(cats)
            for cat, sample in zip(cats, samples):
                if self.cats[cat] >= self.cat_freq_cut:
                    self.samples[" ".join(sample)] = cat

        self.cats = {k: v for (k, v) in self.cats.items() \
                        if v >= self.cat_freq_cut}
        self.words = {k: v for (k, v) in self.words.items() \
                        if v >= self.word_freq_cut}
        with open(outdir + "/unary_rules.txt", "w") as f:
            self._write(self.unary_rules, f, comment_out_value=True)
        with open(outdir + "/seen_rules.txt", "w") as f:
            self._write(self.seen_rules, f, comment_out_value=True)
        with open(outdir + "/target.txt", "w") as f:
            self._write(self.cats, f, comment_out_value=False)
        with open(outdir + "/words.txt", "w") as f:
            self._write(self.words, f, comment_out_value=False)
        with open(outdir + "/chars.txt", "w") as f:
            self._write(self.chars, f, comment_out_value=False)
        with open(outdir + "/traindata.json", "w") as f:
            json.dump(self.samples, f)
        with open(outdir + "/trainsents.txt", "w") as f:
            for sent in self.sents:
                f.write(sent.encode("utf-8") + "\n")
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, char_dim=None):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            # use as supertagger
            with open(defs_file) as f:
                defs = json.load(f)
            self.word_dim = defs["word_dim"]
            self.char_dim = defs["char_dim"]
        else:
            # training
            self.word_dim = word_dim
            self.char_dim = char_dim
            with open(defs_file, "w") as f:
                json.dump({"model": self.__class__.__name__,
                           "word_dim": self.word_dim,
                           "char_dim": self.char_dim}, f)

        self.extractor = FeatureExtractor(model_path)
        self.targets = read_model_defs(model_path + "/target.txt")
        self.train = True

        hidden_dim = 1000
        in_dim = WINDOW_SIZE * (self.word_dim + self.char_dim)
        super(JaCCGEmbeddingTagger, self).__init__(
                emb_word=L.EmbedID(len(self.extractor.words), self.word_dim),
                emb_char=L.EmbedID(len(self.extractor.chars),
                            self.char_dim, ignore_label=IGNORE),
                linear1=L.Linear(in_dim, hidden_dim),
                linear2=L.Linear(hidden_dim, len(self.targets)),
                )
项目:convolutional_seq2seq    作者:soskek    | 项目源码 | 文件源码
def __call__(self, trainer):
        """Decides whether the extension should be called on this iteration.

        Args:
            trainer (~chainer.training.Trainer): Trainer object that this
                trigger is associated with. The ``observation`` of this trainer
                is used to determine if the trigger should fire.

        Returns:
            bool: ``True`` if the corresponding extension should be invoked in
                this iteration.

        """

        observation = trainer.observation
        summary = self._summary
        key = self._key
        if key in observation:
            summary.add({key: observation[key]})

        if not self._interval_trigger(trainer):
            return False

        stats = summary.compute_mean()
        value = float(stats[key])  # copy to CPU
        self._init_summary()

        if self._best_value is None or self._compare(self._best_value, value):
            self._best_value = value
            return False
        return True
项目:chainermn    作者:chainer    | 项目源码 | 文件源码
def get_example(self, i):
        # It reads the i-th image/label pair and return a preprocessed image.
        # It applies following preprocesses:
        #     - Cropping (random or center rectangular)
        #     - Random flip
        #     - Scaling to [0, 1] value
        crop_size = self.crop_size

        image, label = self.base[i]
        _, h, w = image.shape

        if self.random:
            # Randomly crop a region and flip the image
            top = random.randint(0, h - crop_size - 1)
            left = random.randint(0, w - crop_size - 1)
            if random.randint(0, 1):
                image = image[:, :, ::-1]
        else:
            # Crop the center
            top = (h - crop_size) // 2
            left = (w - crop_size) // 2
        bottom = top + crop_size
        right = left + crop_size

        image = image[:, top:bottom, left:right]
        image -= self.mean[:, top:bottom, left:right]
        image *= (1.0 / 255.0)  # Scale to [0, 1]
        return image, label


# chainermn.create_multi_node_evaluator can be also used with user customized
# evaluator classes that inherit chainer.training.extensions.Evaluator.
项目:chainermn    作者:chainer    | 项目源码 | 文件源码
def get_stats(self):
        '''Get statistics of taking snapshots

        After or during training, checkpointer holds statistics on
        saving checkpoints such as average time, minimum and maximum
        time. With this stats users may identify slow nodes or disk,
        or know average time penalty of taking snapshot and optmize
        interval to take snapshots.

        '''
        return self.stats.report()
项目:chainer-examples    作者:nocotan    | 项目源码 | 文件源码
def main():
    unit = 1000
    batchsize = 100
    epoch = 20

    model = L.Classifier(MLP(unit, 10))

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist()
    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out='result')

    trainer.extend(extensions.Evaluator(test_iter, model))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()
项目:vfm    作者:cemoody    | 项目源码 | 文件源码
def fit(model, train, valid, device=-1, batchsize=4096, n_epoch=500,
        resume=None, alpha=1e-3):
    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu(device)
    optimizer = chainer.optimizers.Adam(alpha)
    optimizer.setup(model)

    # Setup iterators
    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    valid_iter = chainer.iterators.SerialIterator(valid, batchsize,
                                                  repeat=False, shuffle=False)
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (n_epoch, 'epoch'),
                               out='out_' + str(device))

    # Setup logging, printing & saving
    keys = ['loss', 'rmse', 'bias', 'kld0', 'kld1']
    keys += ['kldg', 'kldi', 'hypg', 'hypi']
    keys += ['hypglv', 'hypilv']
    reports = ['epoch']
    reports += ['main/' + key for key in keys]
    reports += ['validation/main/rmse']
    trainer.extend(TestModeEvaluator(valid_iter, model, device=device))
    trainer.extend(extensions.Evaluator(valid_iter, model, device=device))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
    trainer.extend(extensions.PrintReport(reports))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # If previous model detected, resume
    if resume:
        print("Loading from {}".format(resume))
        chainer.serializers.load_npz(resume, trainer)

    # Run the model
    trainer.run()
项目:chainer-ADDA    作者:pfnet-research    | 项目源码 | 文件源码
def pretrain_source_cnn(data, args, epochs=1000):
    print(":: pretraining source encoder")
    source_cnn = Loss(num_classes=10)
    if args.device >= 0:
        source_cnn.to_gpu()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(source_cnn)

    train_iterator, test_iterator = data2iterator(data, args.batchsize, multiprocess=False)

    # train_iterator = chainer.iterators.MultiprocessIterator(data, args.batchsize, n_processes=4)

    updater = chainer.training.StandardUpdater(iterator=train_iterator, optimizer=optimizer, device=args.device)
    trainer = chainer.training.Trainer(updater, (epochs, 'epoch') ,out=args.output)

    # learning rate decay
    # trainer.extend(extensions.ExponentialShift("alpha", rate=0.9, init=args.learning_rate, target=args.learning_rate*10E-5))

    trainer.extend(extensions.Evaluator(test_iterator, source_cnn, device=args.device))
    # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch"))
    trainer.extend(extensions.snapshot_object(optimizer.target, "source_model_epoch_{.updater.epoch}"), trigger=(epochs, "epoch"))

    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))

    trainer.run()

    return source_cnn
项目:chainer-ADDA    作者:pfnet-research    | 项目源码 | 文件源码
def train_target_cnn(source, target, source_cnn, target_cnn, args, epochs=10000):
    print(":: training encoder with target domain")
    discriminator = Discriminator()

    if args.device >= 0:
        source_cnn.to_gpu()
        target_cnn.to_gpu()
        discriminator.to_gpu()

    # target_optimizer = chainer.optimizers.Adam(alpha=1.0E-5, beta1=0.5)
    target_optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    # target_optimizer = chainer.optimizers.MomentumSGD(lr=1.0E-4, momentum=0.99)
    target_optimizer.setup(target_cnn.encoder)
    target_optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    # discriminator_optimizer = chainer.optimizers.Adam(alpha=1.0E-5, beta1=0.5)
    discriminator_optimizer = chainer.optimizers.RMSprop(lr=args.lr)
    # discriminator_optimizer = chainer.optimizers.MomentumSGD(lr=1.0E-4, momentum=0.99)
    discriminator_optimizer.setup(discriminator)
    discriminator_optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    source_train_iterator, source_test_iterator = data2iterator(source, args.batchsize, multiprocess=False)
    target_train_iterator, target_test_iterator = data2iterator(target, args.batchsize, multiprocess=False)

    updater = ADDAUpdater(source_train_iterator, target_train_iterator, source_cnn, target_optimizer, discriminator_optimizer, args)

    trainer = chainer.training.Trainer(updater, (epochs, 'epoch'), out=args.output)

    trainer.extend(extensions.Evaluator(target_test_iterator, target_cnn, device=args.device))
    # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch"))
    trainer.extend(extensions.snapshot_object(target_cnn, "target_model_epoch_{.updater.epoch}"), trigger=(epochs, "epoch"))

    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
    trainer.extend(extensions.PrintReport(
        ["epoch", "loss/discrim", "loss/encoder",
         "validation/main/loss", "validation/main/accuracy", "elapsed_time"]))

    trainer.run()
项目:chainer-graph-cnn    作者:pfnet-research    | 项目源码 | 文件源码
def check_train(self, gpu):
        outdir = tempfile.mkdtemp()
        print("outdir: {}".format(outdir))

        n_classes = 2
        batch_size = 32

        devices = {'main': gpu}

        A = np.array([
            [0, 1, 1, 0],
            [1, 0, 0, 1],
            [1, 0, 0, 0],
            [0, 1, 0, 0],
        ]).astype(np.float32)
        model = graph_cnn.GraphCNN(A, n_out=n_classes)

        optimizer = optimizers.Adam(alpha=1e-4)
        optimizer.setup(model)
        train_dataset = EasyDataset(train=True, n_classes=n_classes)
        train_iter = chainer.iterators.MultiprocessIterator(
            train_dataset, batch_size)
        updater = ParallelUpdater(train_iter, optimizer, devices=devices)
        trainer = chainer.training.Trainer(updater, (10, 'epoch'), out=outdir)
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
        trainer.extend(extensions.PrintReport(
            ['epoch', 'iteration', 'main/loss', 'main/accuracy']))
        trainer.extend(extensions.ProgressBar())
        trainer.run()
项目:Semantic-Segmentation-using-Adversarial-Networks    作者:oyam    | 项目源码 | 文件源码
def parse_args(generators, discriminators, updaters):
    parser = argparse.ArgumentParser(description='Semantic Segmentation using Adversarial Networks')
    parser.add_argument('--generator', choices=generators.keys(), default='fcn32s',
                        help='Generator(segmentor) architecture')
    parser.add_argument('--discriminator', choices=discriminators.keys(), default='largefov',
                        help='Discriminator architecture')
    parser.add_argument('--updater', choices=updaters.keys(), default='gan',
                        help='Updater')
    parser.add_argument('--initgen_path', default='pretrained_model/vgg16.npz',
                        help='Pretrained model of generator')
    parser.add_argument('--initdis_path', default=None,
                        help='Pretrained model of discriminator')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of images in each mini-batch')
    parser.add_argument('--iteration', '-i', type=int, default=100000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='snapshot',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--evaluate_interval', type=int, default=1000,
                        help='Interval of evaluation')
    parser.add_argument('--snapshot_interval', type=int, default=10000,
                        help='Interval of snapshot')
    parser.add_argument('--display_interval', type=int, default=10,
                        help='Interval of displaying log to console')
    return parser.parse_args()
项目:chainer_sklearn    作者:corochann    | 项目源码 | 文件源码
def fit(self, X, y=None, **kwargs):
        """If hyper parameters are set to None, then instance's variable is used,
        this functionality is used Grid search with `set_params` method.
        Also if instance's variable is not set, _default_hyperparam is used. 

        Usage: model.fit(train_dataset) or model.fit(X, y)

        Args:
            train: training dataset, assumes chainer's dataset class 
            test: test dataset for evaluation, assumes chainer's dataset class
            batchsize: batchsize for both training and evaluation
            iterator_class: iterator class used for this training, 
                            currently assumes SerialIterator or MultiProcessIterator
            optimizer: optimizer instance to update parameter
            epoch: training epoch
            out: directory path to save the result
            snapshot_frequency (int): snapshot frequency in epoch. 
                                Negative value indicates not to take snapshot.
            dump_graph: Save computational graph info or not, default is False.
            log_report: Enable LogReport or not
            plot_report: Enable PlotReport or not
            print_report: Enable PrintReport or not
            progress_report: Enable ProgressReport or not
            resume: specify trainer saved path to resume training.

        """
        kwargs = self.filter_sk_params(self.fit_core, kwargs)
        return self.fit_core(X, y, **kwargs)
项目:chainer-EWC    作者:okdshin    | 项目源码 | 文件源码
def train_task(args, train_name, model, epoch_num,
               train_dataset, test_dataset_dict, batch_size):
    optimizer = optimizers.SGD()
    optimizer.setup(model)

    train_iter = iterators.SerialIterator(train_dataset, batch_size)
    test_iter_dict = {name: iterators.SerialIterator(
            test_dataset, batch_size, repeat=False, shuffle=False)
            for name, test_dataset in test_dataset_dict.items()}

    updater = training.StandardUpdater(train_iter, optimizer)
    trainer = training.Trainer(updater, (epoch_num, 'epoch'), out=args.out)
    for name, test_iter in test_iter_dict.items():
        trainer.extend(extensions.Evaluator(test_iter, model), name)
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss'] +
        [test+'/main/loss' for test in test_dataset_dict.keys()] +
        ['main/accuracy'] +
        [test+'/main/accuracy' for test in test_dataset_dict.keys()]))
    trainer.extend(extensions.ProgressBar())
    trainer.extend(extensions.PlotReport(
        [test+"/main/accuracy" for test
         in test_dataset_dict.keys()],
        file_name=train_name+".png"))
    trainer.run()
项目:chainer-EWC    作者:okdshin    | 项目源码 | 文件源码
def main():
    output_dim = 10

    parser = argparse.ArgumentParser(description='EWC MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=800,
                        help='Number of sweeps over the dataset to train')
    """
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    """
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--lam', '-l', type=float, default=15.,
                        help='lambda parameter for EWC loss')
    parser.add_argument('--num_samples', '-n', type=int, default=200,
                        help='number of samples to compute fisher')
    parser.add_argument('--hidden', '-hi', type=int, default=50,
                        help='number of hidden node')
    parser.add_argument('--skip_taskA', '-s', type=bool, default=False,
                        help='whether skip training taskA or not')
    args = parser.parse_args()

    model = EWC(MLP(args.hidden, output_dim), args.lam, args.num_samples)

    train, test = chainer.datasets.get_mnist()
    train2, test2 = permutate_mnist([train, test])

    print("Train without EWC")
    train_tasks_continuosly(args, model, train, test, train2, test2,
                            enable_ewc=False)

    print("Train with EWC")
    train_tasks_continuosly(args, model, train, test, train2, test2,
                            enable_ewc=True)
项目:chainer-pspnet    作者:mitmul    | 项目源码 | 文件源码
def create_updater(train_iter, optimizer, devices):
    if HAVE_NCCL and len(devices) > 1:
        updater = training.updaters.MultiprocessParallelUpdater(
            train_iter, optimizer, devices=devices)
    elif len(devices) > 1:
        optimizer.lr /= len(devices)
        updater = training.ParallelUpdater(
            train_iter, optimizer, devices=devices)
    else:
        updater = training.StandardUpdater(
            train_iter, optimizer, device=devices['main'])
    return updater
项目:chainer-pspnet    作者:mitmul    | 项目源码 | 文件源码
def create_updater(train_iter, optimizer, device):
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    return updater
项目:face-classifier-cnn    作者:nknytk    | 项目源码 | 文件源码
def main(config_file):
    with open(config_file) as fp:
        conf = json.load(fp)
    fe_conf = conf['feature_extractor']
    cl_conf = conf['classifier']

    fe_class = getattr(cnn_feature_extractors, fe_conf['model'])
    feature_extractor = fe_class(n_classes=fe_conf['n_classes'], n_base_units=fe_conf['n_base_units'])
    chainer.serializers.load_npz(fe_conf['out_file'], feature_extractor)

    model = classifiers.MLPClassifier(cl_conf['n_classes'], feature_extractor)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    device = cl_conf.get('device', -1)
    train_dataset = feature_dataset(os.path.join(cl_conf['dataset_path'], 'train'), model)
    train_iter = chainer.iterators.SerialIterator(train_dataset, conf.get('batch_size', 1))
    updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = chainer.training.Trainer(updater, (cl_conf['epoch'], 'epoch'), out='out_re')

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.ProgressBar(update_interval=10))

    test_dataset_path = os.path.join(cl_conf['dataset_path'], 'test')
    if os.path.exists(test_dataset_path):
        test_dataset = feature_dataset(test_dataset_path, model)
        test_iter = chainer.iterators.SerialIterator(test_dataset, 10, repeat=False, shuffle=False)
        trainer.extend(extensions.Evaluator(test_iter, model, device=device))
        trainer.extend(extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy'
        ]))
    else:
        trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy']))

    trainer.run()

    chainer.serializers.save_npz(cl_conf['out_file'], model)
项目:face-classifier-cnn    作者:nknytk    | 项目源码 | 文件源码
def main(config_file):
    with open(config_file) as fp:
        conf = json.load(fp)['feature_extractor']

    model_class = getattr(cnn_feature_extractors, conf['model'])
    model = model_class(conf['n_classes'], conf['n_base_units'])

    resume_file = conf['out_file'] + '.to_resume'
    if os.path.exists(resume_file):
        chainer.serializers.load_npz(resume_file, model)
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    device = conf.get('device', -1)
    train_dataset = create_dataset(os.path.join(conf['dataset_path'], 'train'))
    train_iter = chainer.iterators.SerialIterator(train_dataset, conf.get('batch_size', 10))
    updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = chainer.training.Trainer(updater, (conf['epoch'], 'epoch'), out='out')

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.ProgressBar(update_interval=10))

    test_dataset_path = os.path.join(conf['dataset_path'], 'test')
    if os.path.exists(test_dataset_path):
        test_dataset = create_dataset(test_dataset_path)
        test_iter = chainer.iterators.SerialIterator(test_dataset, 20, repeat=False, shuffle=False)
        trainer.extend(extensions.Evaluator(test_iter, model, device=device))
        trainer.extend(extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy'
        ]))
    else:
        trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy']))

    trainer.run()

    model = model.to_cpu()
    chainer.serializers.save_npz(conf['out_file'], model)
项目:chainer-cyclegan    作者:Aixile    | 项目源码 | 文件源码
def evaluation(gen_g, gen_f, test_image_folder, image_size=256, side=2):
    @chainer.training.make_extension()
    def _eval(trainer, it):
        xp = gen_g.xp
        batch = it.next()
        batchsize = len(batch)

        #x = []
        x = xp.zeros((batchsize, 3, image_size, image_size)).astype("f")
        t = xp.zeros((batchsize, 3, image_size, image_size)).astype("f")
        for i in range(batchsize):
            x[i, :] = xp.asarray(batch[i][0])
            t[i, :] = xp.asarray(batch[i][1])

        x = Variable(x)
        result = gen_g(x, test=True)
        img = result.data.get()

        img_c = img.reshape((side, side, 3, image_size, image_size))
        img_c = img_c.transpose(0,1,3,4,2)
        img_c = (img + 1) *127.5
        img_c = np.clip(img_c, 0, 255)
        img_c = img_c.astype(np.uint8)
        img_c = img_c.reshape((side, side, image_size, image_size, 3)).transpose(0,2,1,3,4).reshape((side*image_size, side*image_size, 3))[:,:,::-1]
        Image.fromarray(img_c).save(test_image_folder+"/iter_"+str(trainer.updater.iteration)+"_G.jpg")

        t = Variable(t)
        result = gen_f(t, test=True)
        img_t = result.data.get()
        img_t = img_t.reshape( (side, side, 3, image_size, image_size))
        img_t = img_t.transpose(0,1,3,4,2)
        img_t = (img + 1) *127.5
        img_t = np.clip(img_t, 0, 255)
        img_t = img_t.astype(np.uint8)
        img_t = img_t.reshape((side, side, image_size, image_size, 3)).transpose(0,2,1,3,4).reshape((side*image_size, side*image_size, 3))[:,:,::-1]
        #print(img_t)
        Image.fromarray(img_t).save(test_image_folder+"/iter_"+str(trainer.updater.iteration)+"_F.jpg")

    def evaluation(trainer):
        it = trainer.updater.get_iterator('test')
        _eval(trainer, it)

    return evaluation
项目:char-rnn-text-generation    作者:yxtay    | 项目源码 | 文件源码
def train_main(args):
    """
    trains model specfied in args.
    main method for train subcommand.
    """
    # load text
    with open(args.text_path) as f:
        text = f.read()
    logger.info("corpus length: %s.", len(text))

    # data iterator
    data_iter = DataIterator(text, args.batch_size, args.seq_len)

    # load or build model
    if args.restore:
        logger.info("restoring model.")
        load_path = args.checkpoint_path if args.restore is True else args.restore
        model = load_model(load_path)
    else:
        net = Network(vocab_size=VOCAB_SIZE,
                      embedding_size=args.embedding_size,
                      rnn_size=args.rnn_size,
                      num_layers=args.num_layers,
                      drop_rate=args.drop_rate)
        model = L.Classifier(net)

    # make checkpoint directory
    log_dir = make_dirs(args.checkpoint_path)
    with open("{}.json".format(args.checkpoint_path), "w") as f:
        json.dump(model.predictor.args, f, indent=2)
    chainer.serializers.save_npz(args.checkpoint_path, model)
    logger.info("model saved: %s.", args.checkpoint_path)

    # optimizer
    optimizer = chainer.optimizers.Adam(alpha=args.learning_rate)
    optimizer.setup(model)
    # clip gradient norm
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.clip_norm))

    # trainer
    updater = BpttUpdater(data_iter, optimizer)
    trainer = chainer.training.Trainer(updater, (args.num_epochs, 'epoch'), out=log_dir)
    trainer.extend(extensions.snapshot_object(model, filename=os.path.basename(args.checkpoint_path)))
    trainer.extend(extensions.ProgressBar(update_interval=1))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PlotReport(y_keys=["main/loss"]))
    trainer.extend(LoggerExtension(text))

    # training start
    model.predictor.reset_state()
    logger.info("start of training.")
    time_train = time.time()
    trainer.run()

    # training end
    duration_train = time.time() - time_train
    logger.info("end of training, duration: %ds.", duration_train)
    # generate text
    seed = generate_seed(text)
    generate_text(model, seed, 1024, 3)
    return model
项目:Comicolorization    作者:DwangoMediaVillage    | 项目源码 | 文件源码
def create_trainer(
        config,
        project_path,
        updater,
        model,
        eval_func,
        iterator_test,
        iterator_train_varidation,
        loss_names,
        converter=chainer.dataset.convert.concat_examples,
):
    # type: (TrainConfig, str, any, typing.Dict, any, any, any, any, any) -> any
    def _make_evaluator(iterator):
        return utility.chainer_utility.NoVariableEvaluator(
            iterator,
            target=model,
            converter=converter,
            eval_func=eval_func,
            device=config.gpu,
        )

    trainer = chainer.training.Trainer(updater, out=project_path)

    log_trigger = (config.log_iteration, 'iteration')
    save_trigger = (config.save_iteration, 'iteration')

    eval_test_name = 'eval/test'
    eval_train_name = 'eval/train'

    snapshot = extensions.snapshot_object(model['main'], '{.updater.iteration}.model')
    trainer.extend(snapshot, trigger=save_trigger)

    trainer.extend(extensions.dump_graph('main/' + loss_names[0], out_name='main.dot'))

    trainer.extend(_make_evaluator(iterator_test), name=eval_test_name, trigger=log_trigger)
    trainer.extend(_make_evaluator(iterator_train_varidation), name=eval_train_name, trigger=log_trigger)

    report_target = []
    for evaluator_name in ['', eval_test_name + '/', eval_train_name + '/']:
        for model_name in ['main/']:
            for loss_name in loss_names:
                report_target.append(evaluator_name + model_name + loss_name)

    trainer.extend(extensions.LogReport(trigger=log_trigger, log_name="log.txt"))
    trainer.extend(extensions.PrintReport(report_target))

    return trainer
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = LSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers,
            args.hidden_dim, args.elu_dim, args.dep_dim, args.dropout_ratio)
    with open(args.model + "/params", "w") as f: log(args, f)

    if args.initmodel:
        print 'Load model from', args.initmodel
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print 'Load pretrained word embeddings from', args.pretrained
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    train = LSTMParserDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer,
            device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model,
                    converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_accuracy',
        'validation/main/parsing_accuracy'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = LSTMTagger(args.model, args.word_emb_size, args.afix_emb_size,
            args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio)
    with open(args.model + "/params", "w") as f:
            log(args, f)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    train = LSTMTaggerDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMTaggerDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer,
            device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 2000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = BiaffineJaLSTMParser(args.model, args.word_emb_size, args.char_emb_size,
            args.nlayers, args.hidden_dim, args.dep_dim, args.dropout_ratio)
    with open(args.model + "/params", "w") as f: log(args, f)

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    train = LSTMParserDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(2e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer,
            device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.ExponentialShift(
                    "eps", .75, 2e-3), trigger=(2500, 'iteration'))
    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_accuracy',
        'validation/main/parsing_accuracy'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2,
            hidden_dim=128, relu_dim=64, dep_dim=100, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            # use as supertagger
            with open(defs_file) as f:
                defs = json.load(f)
            self.dep_dim    = defs["dep_dim"]
            self.word_dim   = defs["word_dim"]
            self.char_dim   = defs["char_dim"]
            self.hidden_dim = defs["hidden_dim"]
            self.relu_dim   = defs["relu_dim"]
            self.nlayers    = defs["nlayers"]
            self.train = False
            self.extractor = FeatureExtractor(model_path)
        else:
            # training
            self.dep_dim = dep_dim
            self.word_dim = word_dim
            self.char_dim = char_dim
            self.hidden_dim = hidden_dim
            self.relu_dim = relu_dim
            self.nlayers = nlayers
            self.train = True
            with open(defs_file, "w") as f:
                json.dump({"model": self.__class__.__name__,
                           "word_dim": self.word_dim, "char_dim": self.char_dim,
                           "hidden_dim": hidden_dim, "relu_dim": relu_dim,
                           "nlayers": nlayers, "dep_dim": dep_dim}, f)

        self.targets = read_model_defs(model_path + "/target.txt")
        self.words = read_model_defs(model_path + "/words.txt")
        self.chars = read_model_defs(model_path + "/chars.txt")
        self.in_dim = self.word_dim + self.char_dim
        self.dropout_ratio = dropout_ratio
        super(JaLSTMParser, self).__init__(
                emb_word=L.EmbedID(len(self.words), self.word_dim),
                emb_char=L.EmbedID(len(self.chars), 50, ignore_label=IGNORE),
                conv_char=L.Convolution2D(1, self.char_dim,
                    (3, 50), stride=1, pad=(1, 0)),
                lstm_f=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, self.dropout_ratio),
                lstm_b=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, self.dropout_ratio),
                linear_cat1=L.Linear(2 * self.hidden_dim, self.relu_dim),
                linear_cat2=L.Linear(self.relu_dim, len(self.targets)),
                linear_dep=L.Linear(2 * self.hidden_dim, self.dep_dim),
                linear_head=L.Linear(2 * self.hidden_dim, self.dep_dim),
                biaffine=Biaffine(self.dep_dim)
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def __init__(self, model_path, word_dim=None, char_dim=None,
            nlayers=2, hidden_dim=128, relu_dim=64, dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            # use as supertagger
            with open(defs_file) as f:
                defs = json.load(f)
            self.word_dim   = defs["word_dim"]
            self.char_dim   = defs["char_dim"]
            self.hidden_dim = defs["hidden_dim"]
            self.relu_dim   = defs["relu_dim"]
            self.nlayers    = defs["nlayers"]
            self.train = False
            self.extractor = FeatureExtractor(model_path)
        else:
            # training
            self.word_dim = word_dim
            self.char_dim = char_dim
            self.hidden_dim = hidden_dim
            self.relu_dim = relu_dim
            self.nlayers = nlayers
            self.train = True
            with open(defs_file, "w") as f:
                json.dump({"model": self.__class__.__name__,
                           "word_dim": self.word_dim, "char_dim": self.char_dim,
                           "hidden_dim": hidden_dim, "relu_dim": relu_dim,
                           "nlayers": nlayers}, f)

        self.targets = read_model_defs(model_path + "/target.txt")
        self.words = read_model_defs(model_path + "/words.txt")
        self.chars = read_model_defs(model_path + "/chars.txt")
        self.in_dim = self.word_dim + self.char_dim
        self.dropout_ratio = dropout_ratio
        super(JaLSTMTagger, self).__init__(
                emb_word=L.EmbedID(len(self.words), self.word_dim),
                emb_char=L.EmbedID(len(self.chars), 50, ignore_label=IGNORE),
                conv_char=L.Convolution2D(1, self.char_dim,
                    (3, 50), stride=1, pad=(1, 0)),
                lstm_f=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, 0.),
                lstm_b=L.NStepLSTM(nlayers, self.in_dim,
                    self.hidden_dim, 0.),
                conv1=L.Convolution2D(1, 2 * self.hidden_dim,
                    (7, 2 * self.hidden_dim), stride=1, pad=(3, 0)),
                linear1=L.Linear(2 * self.hidden_dim, self.relu_dim),
                linear2=L.Linear(self.relu_dim, len(self.targets)),
                )
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = LSTMTagger(args.model, args.word_emb_size, args.char_emb_size,
            args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio)
    with open(args.model + "/params", "w") as f:
            log(args, f)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    train = LSTMTaggerDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMTaggerDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, converter), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = PeepHoleJaLSTMParser(args.model, args.word_emb_size, args.char_emb_size,
            args.nlayers, args.hidden_dim, args.relu_dim, args.dep_dim, args.dropout_ratio)

    with open(args.model + "/params", "w") as f: log(args, f)

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()


    converter = lambda x, device: \
            concat_examples(x, device=device, padding=-1)

    train = LSTMParserDataset(args.model, args.train)
    train_iter = SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 2000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model,
                    converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/tagging_loss',
        'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_accuracy',
        'validation/main/parsing_accuracy'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = LSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers,
            args.hidden_dim, args.elu_dim, args.dep_dim, args.dropout_ratio)
    with open(args.model + "/params", "w") as f: log(args, f)

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    train = LSTMParserDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer,
            device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model,
                    converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration',
        'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_accuracy',
        'validation/main/parsing_accuracy'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def compress_traindata(args):
    words = OrderedDict()
    print("reading embedding vocabulary")
    for word in open(args.vocab):
        words[word.strip()] = 1
    suffixes = defaultdict(int)
    suffixes["UNK"] = 1
    caps = defaultdict(int)
    target = defaultdict(int)
    traindata = open(args.path)
    len_traindata = 0
    print("reading training file")
    for line in traindata:
        len_traindata += 1
        items = line.strip().split(" ")
        target[items[-1]] += 1
        for item in items[:-1]:
            word, suffix, cap = item.split("|")
            if words.has_key(word):
                words[word] += 1
            else:
                words[word] = 1
            suffixes[suffix] += 1
            caps[cap] += 1
    def out_dict(d, outfile, freq_cut=-1):
        print("writing to {}".format(outfile))
        res = {}
        with open(outfile, "w") as out:
            i = 0
            for item, n in d.items():
                if freq_cut <= n:
                    out.write("{} {}\n".format(item, n))
                    res[item] = i
                    i += 1
        return res
    word2id = out_dict(words, os.path.join(args.out, "words.txt"))
    suffix2id = out_dict(suffixes, os.path.join(args.out, "suffixes.txt"))
    cap2id = out_dict(caps, os.path.join(args.out, "caps.txt"))
    target2id = out_dict(target, os.path.join(args.out, "target.txt"), freq_cut=10)
    traindata.seek(0)
    new_traindata = os.path.join(args.out, "traindata.txt")
    print("writing to {}".format(new_traindata))
    with open(new_traindata, "w") as out:
        for i, line in enumerate(traindata):
            items = line.strip().split(" ")
            if not target2id.has_key(items[-1]):
                continue
            target =items[-1]
            new_line = ""
            for j, item in enumerate(items[:-1]):
                word, suffix, cap = item.split("|")
                if not word2id.has_key(word):
                    word = "*UNKNOWN*"
                if not suffix2id.has_key(suffix):
                    suffix = "UNK"
                new_line += "|".join([word, suffix, cap]) + " "
            out.write(new_line + target + "\n")
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = JaCCGEmbeddingTagger(args.model,
                args.word_emb_size, args.char_emb_size)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    train = JaCCGTaggerDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = JaCCGTaggerDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.AdaGrad()
    optimizer.setup(model)
    # optimizer.add_hook(WeightDecay(1e-8))
    my_converter = lambda x, dev: convert.concat_examples(x, dev, (None,-1,None,None))
    updater = training.StandardUpdater(train_iter, optimizer, converter=my_converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, my_converter), trigger=val_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = PeepHoleLSTMTagger(args.model, args.word_emb_size, args.afix_emb_size,
            args.nlayers, args.hidden_dim, args.relu_dim, args.dropout_ratio)
    with open(args.model + "/params", "w") as f:
            log(args, f)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    converter = lambda x, device: \
            concat_examples(x, device=device, padding=-1)

    train = LSTMTaggerDataset(args.model, args.train)
    train_iter = SerialIterator(train, args.batchsize)
    val = LSTMTaggerDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer,
            device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(
        val_iter, eval_model, converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/loss', 'validation/main/loss',
        'main/accuracy', 'validation/main/accuracy',
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def train(args):
    model = PeepHoleLSTMParser(args.model, args.word_emb_size, args.afix_emb_size, args.nlayers,
            args.hidden_dim, args.elu_dim, args.dep_dim, args.dropout_ratio, args.gpu >= 0)
    with open(args.model + "/params", "w") as f: log(args, f)

    if args.initmodel:
        print 'Load model from', args.initmodel
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print 'Load pretrained word embeddings from', args.pretrained
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    converter = lambda x, device: \
            concat_examples(x, device=device, padding=-1)

    train = LSTMParserDataset(args.model, args.train)
    train_iter = SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer,
            device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model,
                    converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/tagging_loss',
        'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_loss', 'validation/main/tagging_accuracy',
        'validation/main/parsing_loss', 'validation/main/parsing_accuracy'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
项目:DeepPoseComparison    作者:ynaka81    | 项目源码 | 文件源码
def start(self):
        """ Train pose net. """
        # set random seed.
        if self.seed is not None:
            random.seed(self.seed)
            np.random.seed(self.seed)
            if self.gpu >= 0:
                chainer.cuda.cupy.random.seed(self.seed)
        # initialize model to train.
        model = AlexNet(self.Nj, self.use_visibility)
        if self.resume_model:
            serializers.load_npz(self.resume_model, model)
        # prepare gpu.
        if self.gpu >= 0:
            chainer.cuda.get_device(self.gpu).use()
            model.to_gpu()
        # load the datasets.
        train = PoseDataset(self.train, data_augmentation=self.data_augmentation)
        val = PoseDataset(self.val, data_augmentation=False)
        # training/validation iterators.
        train_iter = chainer.iterators.MultiprocessIterator(
            train, self.batchsize)
        val_iter = chainer.iterators.MultiprocessIterator(
            val, self.batchsize, repeat=False, shuffle=False)
        # set up an optimizer.
        optimizer = self._get_optimizer()
        optimizer.setup(model)
        if self.resume_opt:
            chainer.serializers.load_npz(self.resume_opt, optimizer)
        # set up a trainer.
        updater = training.StandardUpdater(train_iter, optimizer, device=self.gpu)
        trainer = training.Trainer(
            updater, (self.epoch, 'epoch'), os.path.join(self.out, 'chainer'))
        # standard trainer settings
        trainer.extend(extensions.dump_graph('main/loss'))
        val_interval = (10, 'epoch')
        trainer.extend(TestModeEvaluator(val_iter, model, device=self.gpu), trigger=val_interval)
        # save parameters and optimization state per validation step
        resume_interval = (self.epoch/10, 'epoch')
        trainer.extend(extensions.snapshot_object(
            model, "epoch-{.updater.epoch}.model"), trigger=resume_interval)
        trainer.extend(extensions.snapshot_object(
            optimizer, "epoch-{.updater.epoch}.state"), trigger=resume_interval)
        trainer.extend(extensions.snapshot(
            filename="epoch-{.updater.epoch}.iter"), trigger=resume_interval)
        # show log
        log_interval = (10, "iteration")
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss', 'lr']), trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))
        # start training
        if self.resume:
            chainer.serializers.load_npz(self.resume, trainer)
        trainer.run()
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def train(args):
    time_start = timer()
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        cuda.check_cuda_available()


    if args.path_vocab == '':
        vocab = create_from_dir(args.path_corpus)
    else:
        vocab = Vocabulary()
        vocab.load(args.path_vocab)
        logger.info("loaded vocabulary")

    if args.context_representation != 'word': # for deps or ner context representation, we need a new context vocab for NS or HSM loss function.
        vocab_context = create_from_annotated_dir(args.path_corpus, representation=args.context_representation)
    else :
        vocab_context = vocab

    loss_func = get_loss_func(args, vocab_context)
    model = get_model(args, loss_func, vocab)

    if args.gpu >= 0:
        model.to_gpu()
        logger.debug("model sent to gpu")

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    if os.path.isfile(args.path_corpus):
        train, val = get_data(args.path_corpus, vocab)
        if args.test:
            train = train[:100]
            val = val[:100]
        train_iter = WindowIterator(train, args.window, args.batchsize)
        val_iter = WindowIterator(val, args.window, args.batchsize, repeat=False)
    else:
        train_iter = DirWindowIterator(path=args.path_corpus, vocab=vocab, window_size=args.window, batch_size=args.batchsize)
    updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.path_out)

    if os.path.isfile(args.path_corpus):
        trainer.extend(extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu))
    trainer.extend(extensions.LogReport())
    if os.path.isfile(args.path_corpus):
        trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
    else:
        trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time']))
    # trainer.extend(extensions.ProgressBar())
    trainer.run()
    model = create_model(args, model, vocab)
    time_end = timer()
    model.metadata["execution_time"] = time_end - time_start
    return model
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def main(options):

    #load the config params
    gpu = options['gpu']
    data_path = options['path_dataset']
    embeddings_path = options['path_vectors']
    n_epoch = options['epochs']
    batch_size = options['batchsize']
    test = options['test']
    embed_dim = options['embed_dim']
    freeze = options['freeze_embeddings']
    distance_embed_dim = options['distance_embed_dim']

    #load the data
    data_processor = DataProcessor(data_path)
    data_processor.prepare_dataset()
    train_data = data_processor.train_data
    test_data = data_processor.test_data

    vocab = data_processor.vocab
    cnn = CNN(n_vocab=len(vocab), input_channel=1,
                    output_channel=100, 
                    n_label=19, 
                    embed_dim=embed_dim, position_dims=distance_embed_dim, freeze=freeze)
    cnn.load_embeddings(embeddings_path, data_processor.vocab)
    model = L.Classifier(cnn)

    #use GPU if flag is set
    if gpu >= 0:
        model.to_gpu()

    #setup the optimizer
    optimizer = O.Adam()
    optimizer.setup(model)

    train_iter = chainer.iterators.SerialIterator(train_data, batch_size)
    test_iter = chainer.iterators.SerialIterator(test_data, batch_size,repeat=False, shuffle=False) 

    updater = training.StandardUpdater(train_iter, optimizer, converter=convert.concat_examples, device=gpu)
    trainer = training.Trainer(updater, (n_epoch, 'epoch'))

    # Evaluation
    test_model = model.copy()
    test_model.predictor.train = False
    trainer.extend(extensions.Evaluator(test_iter, test_model, device=gpu, converter=convert.concat_examples))


    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar(update_interval=10))


    trainer.run()
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def main(options):

    #load the config params
    gpu = options['gpu']
    data_path = options['path_dataset']
    embeddings_path = options['path_vectors']
    n_epoch = options['epochs']
    batchsize = options['batchsize']
    test = options['test']
    embed_dim = options['embed_dim']
    freeze = options['freeze_embeddings']

    #load the data
    data_processor = DataProcessor(data_path, test)
    data_processor.prepare_dataset()
    train_data = data_processor.train_data
    dev_data = data_processor.dev_data
    test_data = data_processor.test_data

    vocab = data_processor.vocab
    cnn = CNN(n_vocab=len(vocab), input_channel=1,
                  output_channel=10, n_label=2, embed_dim=embed_dim, freeze=freeze)
    cnn.load_embeddings(embeddings_path, data_processor.vocab)
    model = L.Classifier(cnn)
    if gpu >= 0:
        model.to_gpu()

    #setup the optimizer
    optimizer = O.Adam()
    optimizer.setup(model)


    train_iter = chainer.iterators.SerialIterator(train_data, batchsize)
    dev_iter = chainer.iterators.SerialIterator(dev_data, batchsize,repeat=False, shuffle=False)
    test_iter = chainer.iterators.SerialIterator(test_data, batchsize,repeat=False, shuffle=False) 
    batch1 = train_iter.next()
    batch2 = dev_iter.next()
    updater = training.StandardUpdater(train_iter, optimizer, converter=util.concat_examples, device=gpu)
    trainer = training.Trainer(updater, (n_epoch, 'epoch'))

    # Evaluation
    eval_model = model.copy()
    eval_model.predictor.train = False
    trainer.extend(extensions.Evaluator(dev_iter, eval_model, device=gpu, converter=util.concat_examples))

    test_model = model.copy()
    test_model.predictor.train = False

    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar(update_interval=10))


    trainer.run()