我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.using_config()。
def feature_map_activations(self, x): """Forward pass through the convolutional layers of the VGG returning all of its intermediate feature map activations.""" hs = [] pre_pooling_sizes = [] h = x for conv_block, mp in zip(self.conv_blocks, self.mps): for conv in conv_block: h = F.relu(conv(h)) pre_pooling_sizes.append(h.data.shape[2:]) # Disable cuDNN, else pooling indices will not be stored with chainer.using_config('use_cudnn', 'never'): h = mp.apply((h,))[0] hs.append(h) return hs, pre_pooling_sizes
def act(self, state): with chainer.using_config('train', False): s = self.batch_states([state], self.xp, self.phi) if self.act_deterministically: action = self.policy(s).most_probable else: action = self.policy(s).sample() # Q is not needed here, but log it just for information q = self.q_function(s, action) # Update stats self.average_q *= self.average_q_decay self.average_q += (1 - self.average_q_decay) * float(q.data) self.logger.debug('t:%s a:%s q:%s', self.t, action.data[0], q.data) return cuda.to_cpu(action.data[0])
def _compute_target_values(self, exp_batch, gamma): batch_next_state = exp_batch['next_state'] with chainer.using_config('train', False): with state_kept(self.q_function): next_qout = self.q_function(batch_next_state) target_next_qout = self.target_q_function(batch_next_state) next_q_max = target_next_qout.evaluate_actions( next_qout.greedy_actions) batch_rewards = exp_batch['reward'] batch_terminal = exp_batch['is_state_terminal'] return batch_rewards + self.gamma * (1.0 - batch_terminal) * next_q_max
def test_rnn(): np.random.seed(0) num_layers = 50 seq_length = num_layers * 2 batchsize = 2 vocab_size = 4 data = np.random.randint(0, vocab_size, size=(batchsize, seq_length), dtype=np.int32) source, target = make_source_target_pair(data) model = RNNModel(vocab_size, ndim_embedding=100, num_layers=num_layers, ndim_h=3, kernel_size=3, pooling="fo", zoneout=False, wgain=1, densely_connected=True) with chainer.using_config("train", False): np.random.seed(0) model.reset_state() Y = model(source).data model.reset_state() np.random.seed(0) for t in range(source.shape[1]): y = model.forward_one_step(source[:, :t+1]).data target = np.swapaxes(np.reshape(Y, (batchsize, -1, vocab_size)), 1, 2) target = np.reshape(np.swapaxes(target[:, :, t, None], 1, 2), (batchsize, -1)) assert np.sum((y - target) ** 2) == 0 print("t = {} OK".format(t))
def __call__(self, trainer): print('## Calculate BLEU') with chainer.no_backprop_mode(): with chainer.using_config('train', False): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) * 100 print('BLEU:', bleu) reporter.report({self.key: bleu})
def __call__(self, xs): if self.freeze: self.embed.disable_update() xs = self.embed(xs) batchsize, height, width = xs.shape xs = F.reshape(xs, (batchsize, 1, height, width)) conv3_xs = self.conv3(xs) conv4_xs = self.conv4(xs) conv5_xs = self.conv5(xs) h1 = F.max_pooling_2d(F.relu(conv3_xs), conv3_xs.shape[2]) h2 = F.max_pooling_2d(F.relu(conv4_xs), conv4_xs.shape[2]) h3 = F.max_pooling_2d(F.relu(conv5_xs), conv5_xs.shape[2]) concat_layer = F.concat([h1, h2, h3], axis=1) with chainer.using_config('train', True): y = self.l1(F.dropout(F.tanh(concat_layer))) return y
def plot_scatter(): parser = argparse.ArgumentParser() parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() dataset_train, dataset_test = chainer.datasets.get_mnist() images_train, labels_train = dataset_train._datasets images_test, labels_test = dataset_test._datasets model = Model() assert model.load(args.model) # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 with chainer.no_backprop_mode() and chainer.using_config("train", False): z = model.encode_x_yz(images_test)[1].data plot.scatter_labeled_z(z, labels_test, "scatter_gen.png")
def plot_representation(): parser = argparse.ArgumentParser() parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() dataset_train, dataset_test = chainer.datasets.get_mnist() images_train, labels_train = dataset_train._datasets images_test, labels_test = dataset_test._datasets model = Model() assert model.load(args.model) # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 with chainer.no_backprop_mode() and chainer.using_config("train", False): y_onehot, z = model.encode_x_yz(images_test, apply_softmax_y=True) representation = model.encode_yz_representation(y_onehot, z).data plot.scatter_labeled_z(representation, labels_test, "scatter_r.png")
def plot_z(): parser = argparse.ArgumentParser() parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() dataset_train, dataset_test = chainer.datasets.get_mnist() images_train, labels_train = dataset_train._datasets images_test, labels_test = dataset_test._datasets model = Model() assert model.load(args.model) # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 with chainer.no_backprop_mode() and chainer.using_config("train", False): z = model.encode_x_yz(images_test)[1].data plot.scatter_labeled_z(z, labels_test, "scatter_z.png")
def plot_scatter(): parser = argparse.ArgumentParser() parser.add_argument("--model", "-m", type=str, default="model.hdf5") args = parser.parse_args() dataset_train, dataset_test = chainer.datasets.get_mnist() images_train, labels_train = dataset_train._datasets images_test, labels_test = dataset_test._datasets model = Model() assert model.load(args.model) # normalize images_train = (images_train - 0.5) * 2 images_test = (images_test - 0.5) * 2 with chainer.no_backprop_mode() and chainer.using_config("train", False): z = model.encode_x_z(images_test).data plot.scatter_labeled_z(z, labels_test, "scatter_z.png")
def check_forward(self, x_data): x = chainer.Variable(x_data) # Make the batch normalization to be the identity function. self.l.bn.avg_var[:] = 1 self.l.bn.avg_mean[:] = 0 with chainer.using_config('train', False): y = self.l(x) self.assertIsInstance(y, chainer.Variable) self.assertIsInstance(y.array, self.l.xp.ndarray) if self.activ == 'relu': np.testing.assert_almost_equal( cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0), decimal=4 ) elif self.activ == 'add_one': np.testing.assert_almost_equal( cuda.to_cpu(y.array), cuda.to_cpu(x_data) + 1, decimal=4 )
def out_generated_image(gen, dis, rows, cols, seed, dst): @chainer.training.make_extension() def make_image(trainer): np.random.seed(seed) n_images = rows * cols xp = gen.xp z = Variable(xp.asarray(gen.make_hidden(n_images))) with chainer.using_config('train', False): x = gen(z) x = chainer.cuda.to_cpu(x.data) np.random.seed() x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8) _, _, H, W = x.shape x = x.reshape((rows, cols, 3, H, W)) x = x.transpose(0, 3, 1, 4, 2) x = x.reshape((rows * H, cols * W, 3)) preview_dir = '{}/preview'.format(dst) preview_path = preview_dir +\ '/image{:0>8}.png'.format(trainer.updater.iteration) if not os.path.exists(preview_dir): os.makedirs(preview_dir) Image.fromarray(x).save(preview_path) return make_image
def translate(self, sentence: np.ndarray, max_length: int = 30) -> List[int]: with chainer.no_backprop_mode(), chainer.using_config('train', False): sentence = sentence[::-1] embedded_xs = self._embed_input(sentence) hidden_states, cell_states, attentions = self._encoder(None, None, [embedded_xs]) wid = EOS result = [] for i in range(max_length): output, hidden_states, cell_states = \ self._translate_one_word(wid, hidden_states, cell_states, attentions) wid = np.argmax(output.data) if wid == EOS: break result.append(wid) return result
def test_pretrained_on_target(source_cnn, target, args): print(":: testing pretrained source CNN on target domain") if args.device >= 0: source_cnn.to_gpu() with chainer.using_config('train', False): _, target_test_iterator = data2iterator(target, args.batchsize, multiprocess=False) mean_accuracy = 0.0 n_batches = 0 for batch in target_test_iterator: batch, labels = chainer.dataset.concat_examples(batch, device=args.device) encode = source_cnn.encoder(batch) classify = source_cnn.classifier(encode) acc = accuracy.accuracy(classify, labels) mean_accuracy += acc.data n_batches += 1 mean_accuracy /= n_batches print(":: classifier trained on only source, evaluated on target: accuracy {}%".format(mean_accuracy))
def _forward(self, *args, calc_score=False): """Forward computation without backward. Predicts by the model's output by returning `predictor`'s output """ with chainer.using_config('train', False), chainer.no_backprop_mode(): if calc_score: self(*args) return self.y else: if self.predictor is None: print("[ERROR] predictor is not set or not build yet.") return # TODO: it passes all the args, sometimes (x, y) which is too many arguments. # Consider how to deal with the number of input if hasattr(self.predictor, '_forward'): fn = self.predictor._forward else: fn = self.predictor return fn(*filter_args(fn, args))
def test_rnn(): np.random.seed(0) num_blocks = 10 num_layers_per_block = 5 seq_length = num_layers_per_block * num_blocks * 2 batchsize = 2 vocab_size = 4 data = np.random.randint(0, vocab_size, size=(batchsize, seq_length), dtype=np.int32) source, target = make_source_target_pair(data) model = RNNModel(vocab_size, ndim_embedding=3, num_blocks=num_blocks, num_layers_per_block=num_layers_per_block, ndim_h=3, kernel_size=3, wgain=1) with chainer.using_config("train", False): np.random.seed(0) model.reset_state() Y = model(source).data model.reset_state() np.random.seed(0) for t in xrange(source.shape[1]): y = model.forward_one_step(source[:, :t+1]).data target = np.swapaxes(np.reshape(Y, (batchsize, -1, vocab_size)), 1, 2) target = np.reshape(np.swapaxes(target[:, :, t, None], 1, 2), (batchsize, -1)) assert np.sum((y - target) ** 2) == 0 print("t = {} OK".format(t))
def forward(net, image_batch, sentence_batch, train=True): images = xp.asarray(image_batch) n, sentence_length = sentence_batch.shape net.initialize(images) loss = 0 acc = 0 size = 0 for i in range(sentence_length - 1): target = xp.where(xp.asarray(sentence_batch[:, i]) != eos, 1, 0).astype(np.float32) if (target == 0).all(): break with chainer.using_config('train', train): with chainer.using_config('enable_backprop', train): x = xp.asarray(sentence_batch[:, i]) t = xp.asarray(sentence_batch[:, i + 1]) y = net(x) y_max_index = xp.argmax(y.data, axis=1) mask = target.reshape((len(target), 1)).repeat(y.data.shape[1], axis=1) y = y * mask loss += F.softmax_cross_entropy(y, t) acc += xp.sum((y_max_index == t) * target) size += xp.sum(target) return loss / size, float(acc) / size, float(size)
def evaluate(model, dataset, crop_margin, test_size): xp = model.xp iterator = chainer.iterators.SerialIterator(dataset, 1, repeat=False, shuffle=False) acc_sum = 0 iteration = 0 for batch in iterator: image_batch = [] label_batch = [] for image_path, category_id, _ in batch: image = load_image(image_path) image_width, image_height = image.size crop_size = min(image_width, image_height) - crop_margin crop_rect = ((image_width - crop_size) // 2, (image_height - crop_size) // 2, crop_size, crop_size) # input_size = test_size input_size = int(round(crop_size / 32.0) * 32) if input_size < 64: input_size = 64 elif input_size > test_size: input_size = test_size image_batch.append(transform_image(image, crop_rect, input_size)) label_batch.append(category_id) x = xp.asarray(image_batch) t = xp.asarray(label_batch) with chainer.using_config('enable_backprop', False): with chainer.using_config('train', False): y = model(x) acc = F.accuracy(y, t) acc_sum += float(acc.data) return acc_sum / len(dataset)
def evaluate(model, dataset, crop_margin, test_size, batch_size): xp = model.xp iterator = chainer.iterators.SerialIterator(dataset, batch_size, repeat=False, shuffle=False) acc_sum = 0 iteration = 0 for batch in iterator: image_batch = [] label_batch = [] for image_path, category_id, _ in batch: image = load_image(image_path) image_width, image_height = image.size crop_size = min(image_width, image_height) - crop_margin crop_rect = ((image_width - crop_size) // 2, (image_height - crop_size) // 2, crop_size, crop_size) input_size = test_size image_batch.append(transform_image(image, crop_rect, input_size)) label_batch.append(category_id) x = xp.asarray(image_batch) t = xp.asarray(label_batch) with chainer.using_config('enable_backprop', False): with chainer.using_config('train', False): y = model(x) acc = F.accuracy(y, t) acc_sum += float(acc.data) * batch_size return acc_sum / len(dataset)
def disable_train(chain): call_orig = chain.__call__ def call_test(self, x): with chainer.using_config('train', False): return call_orig(self, x) chain.__call__ = call_test
def act(self, state): with chainer.using_config('train', False): s = self.batch_states([state], self.xp, self.phi) action = self.policy(s).sample() # Q is not needed here, but log it just for information q = self.q_function(s, action) # Update stats self.average_q *= self.average_q_decay self.average_q += (1 - self.average_q_decay) * float(q.data) self.logger.debug('t:%s a:%s q:%s', self.t, action.data[0], q.data) return cuda.to_cpu(action.data[0])
def _act(self, state): xp = self.xp with chainer.using_config('train', False): b_state = batch_states([state], xp, self.phi) with chainer.no_backprop_mode(): action_distrib, v = self.model(b_state) action = action_distrib.sample() return cuda.to_cpu(action.data)[0], cuda.to_cpu(v.data)[0]
def compute_q_values(self, states): """Compute Q-values Args: states (list of cupy.ndarray or numpy.ndarray) Returns: list of numpy.ndarray """ with chainer.using_config('train', False): if not states: return [] batch_x = self.batch_states(states, self.xp, self.phi) q_values = list(cuda.to_cpu( self.model(batch_x).q_values)) return q_values
def act(self, state): with chainer.using_config('train', False): with chainer.no_backprop_mode(): action_value = self.model( self.batch_states([state], self.xp, self.phi)) q = float(action_value.max.data) action = cuda.to_cpu(action_value.greedy_actions.data)[0] # Update stats self.average_q *= self.average_q_decay self.average_q += (1 - self.average_q_decay) * q self.logger.debug('t:%s q:%s action_value:%s', self.t, q, action_value) return action
def CalculateValLoss(self, xs, ys): with chainer.no_backprop_mode(), chainer.using_config('train', False): loss = self.CalcLoss(xs, ys) return loss.data
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, _ = self.encoder(None, exs) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, ys = self.decoder(h, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs_f = xs xs_b = [x[::-1] for x in xs] exs_f = sequence_embed(self.embed_x, xs_f) exs_b = sequence_embed(self.embed_x, xs_b) _, hf = self.encoder_f(None, exs_f) _, hb = self.encoder_b(None, exs_b) ht = list(map(lambda x,y: F.concat([x, y], axis=1), hf, hb)) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder(None, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs_f = xs xs_b = [x[::-1] for x in xs] exs_f = sequence_embed(self.embed_x, xs_f) exs_b = sequence_embed(self.embed_x, xs_b) fx, _ = self.encoder_f(None, exs_f) bx, _ = self.encoder_b(None, exs_b) h = F.concat([fx, bx], axis=2) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, ys = self.decoder(h, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, c, _ = self.encoder(None, None, exs) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, c, ys = self.decoder(h, c, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def CalculateValLoss(self, xs, ys): with chainer.no_backprop_mode(), chainer.using_config('train', False): loss, n_w, n_c, n_c_a = self.CalcLoss(xs, ys) return loss.data
def generate_text(model, seed, length=512, top_n=10): """ generates text of specified length from trained model with given seed character sequence. """ logger.info("generating %s characters from top %s choices.", length, top_n) logger.info('generating with seed: "%s".', seed) generated = seed encoded = encode_text(seed).astype(np.int32) model.predictor.reset_state() with chainer.using_config("train", False), chainer.no_backprop_mode(): for idx in encoded[:-1]: x = Variable(np.array([idx])) # input shape: [1] # set internal states model.predictor(x) next_index = encoded[-1] for i in range(length): x = Variable(np.array([next_index], dtype=np.int32)) # input shape: [1] probs = F.softmax(model.predictor(x)) # output shape: [1, vocab_size] next_index = sample_from_probs(probs.data.squeeze(), top_n) # append to sequence generated += ID2CHAR[next_index] logger.info("generated text: \n%s\n", generated) return generated
def out_generated_image(gen, dis, rows, cols, seed, dst, writer): @chainer.training.make_extension() def make_image(trainer): np.random.seed(seed) n_images = rows * cols xp = gen.xp z = Variable(xp.asarray(gen.make_hidden(n_images))) with chainer.using_config('train', False): x = gen(z) writer.add_image('img', x, trainer.updater.iteration) return make_image
def check_forward(self, x, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn): y = normalize_layer(x, eps=self.eps) self.assertEqual(y.data.dtype, self.dtype) y_expect = _normalize_layer(self.x, self.eps).data testing.assert_allclose(y_expect, y.data, **self.check_forward_options)
def check_backward(self, x, y_grad, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn), chainer.using_config('train', self.train): gradient_check.check_backward( NormalizeLayer(self.eps), x, y_grad, **self.check_backward_options)
def main(): model = load_model(args.model_dir) assert model is not None vocab, vocab_inv = load_vocab(args.model_dir) assert vocab is not None assert vocab_inv is not None vocab_size = model.vocab_size with chainer.using_config("train", False): for n in range(args.num_generate): word_ids = np.arange(0, vocab_size, dtype=np.int32) token = ID_BOS x = np.asarray([[token]]).astype(np.int32) model.reset_state() while token != ID_EOS and x.shape[1] < args.max_sentence_length: u = model.forward_one_step(x) p = F.softmax(u).data[-1] token = np.random.choice(word_ids, size=1, p=p) x = np.append(x, np.asarray([token]).astype(np.int32), axis=1) sentence = [] for token in x[0]: word = vocab_inv[token] sentence.append(word) print(" ".join(sentence))
def translate(self, x_block, max_length=50): # TODO: efficient inference by re-using convolution result with chainer.no_backprop_mode(): with chainer.using_config('train', False): # if isinstance(x_block, list): x_block = source_pad_concat_convert( x_block, device=None) batch, x_length = x_block.shape y_block = self.xp.zeros((batch, 1), dtype=x_block.dtype) eos_flags = self.xp.zeros((batch, ), dtype=x_block.dtype) result = [] for i in range(max_length): log_prob_tail = self(x_block, y_block, y_block, get_prediction=True) ys = self.xp.argmax(log_prob_tail.data, axis=1).astype('i') result.append(ys) y_block = F.concat([y_block, ys[:, None]], axis=1).data eos_flags += (ys == 0) if self.xp.all(eos_flags): break result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == 0) if len(inds) > 0: y = y[:inds[0, 0]] if len(y) == 0: y = np.array([1], 'i') outs.append(y) return outs
def __call__(self, cur_word): # Given the current word ID, predict the next word. x = self.embed(cur_word) # dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout with chainer.using_config('train', True): x = F.dropout(x, self.dropout) h = self.mid(x) with chainer.using_config('train', True): h = F.dropout(h, self.dropout) y = self.out(h) return y
def __call__(self, cur_word): # Given the current word ID, predict the next word. x = self.embed(cur_word) # dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout with chainer.using_config('train', True): x = F.dropout(x, args.dropout) h = self.mid(x) with chainer.using_config('train', True): h = F.dropout(h, args.dropout) y = self.out(h) return y
def __evaluate(self, data): iterator = chainer.iterators.SerialIterator(data, self.batch_size, repeat=False, shuffle=False) total_loss = 0 total_acc = 0 num = 0 with chainer.using_config('enable_backprop', False): with chainer.using_config('train', False): for batch in iterator: x_batch, y_batch = convert.concat_examples(batch, self.device_id) loss, acc = self.__forward(x_batch, y_batch) total_loss += float(loss.data) * len(x_batch) total_acc += float(acc.data) * len(x_batch) num += len(x_batch) iterator.finalize() return total_loss / num, total_acc / num
def _pool_without_cudnn(p, x): with chainer.using_config('use_cudnn', 'never'): return p.apply((x,))[0]
def predict(self, imgs): """Conduct semantic segmentations from images. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their values are :math:`[0, 255]`. Returns: list of numpy.ndarray: List of integer labels predicted from each image in the input \ list. """ labels = list() for img in imgs: C, H, W = img.shape with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.asarray(img[np.newaxis])) score = self.__call__(x)[0].data score = chainer.cuda.to_cpu(score) if score.shape != (C, H, W): dtype = score.dtype score = resize(score, (H, W)).astype(dtype) label = np.argmax(score, axis=0).astype(np.int32) labels.append(label) return labels
def translate(self, xs, max_length=100): with chainer.no_backprop_mode(): with chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) # Encode input sequence and send hidden stats to decoder. self.mn_encoder(exs) # Encoder does not return anything. # All evaluation will be done in decoder process. return None
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(): with chainer.using_config('train', False): result = [] ys = self.xp.zeros(batch, 'i') eys = self.embed_y(ys) eys = chainer.functions.split_axis( eys, batch, 0, force_tuple=True) # Receive hidden stats from encoder process. h, c, ys, _ = self.mn_decoder(eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) # Recursively decode using the previously predicted token. for i in range(1, max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis( eys, batch, 0, force_tuple=True) # Non-MN RNN link can be accessed via `actual_rnn`. h, c, ys = self.mn_decoder.actual_rnn(h, c, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == 0) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(): with chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) # Initial hidden variable and cell variable # zero = self.xp.zeros((self.n_layers, batch, self.n_units), 'f') # NOQA # h, c, _ = self.encoder(zero, zero, exs, train=False) # NOQA h, c, _ = self.encoder(None, None, exs) ys = self.xp.zeros(batch, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis( eys, batch, 0, force_tuple=True) h, c, ys = self.decoder(h, c, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == 0) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate_with_beam_search(self, sentence: np.ndarray, max_length: int = 30, beam_width=3) -> List[int]: with chainer.no_backprop_mode(), chainer.using_config('train', False): sentence = sentence[::-1] embedded_xs = self._embed_input(sentence) hidden_states, cell_states, attentions = self._encoder(None, None, [embedded_xs]) heaps = [[] for _ in range(max_length + 1)] heaps[0].append((0, [EOS], hidden_states, cell_states)) # (score, translation, hidden_states, cell_states) solution = [] solution_score = 1e8 for i in range(max_length): heaps[i] = sorted(heaps[i], key=lambda t: t[0])[:beam_width] for score, translation, i_hidden_states, i_cell_states in heaps[i]: wid = translation[-1] output, new_hidden_states, new_cell_states = \ self._translate_one_word(wid, i_hidden_states, i_cell_states, attentions) for next_wid in np.argsort(output.data)[::-1]: if output.data[next_wid] < 1e-6: break next_score = score - np.log(output.data[next_wid]) if next_score > solution_score: break next_translation = translation + [next_wid] next_item = (next_score, next_translation, new_hidden_states, new_cell_states) if next_wid == EOS: if next_score < solution_score: solution = translation[1:] # [1:] drops first EOS solution_score = next_score else: heaps[i + 1].append(next_item) return solution
def main(args): model = load_model(args.model_dir) assert model is not None vocab, vocab_inv = load_vocab(args.model_dir) assert vocab is not None assert vocab_inv is not None vocab_size = model.vocab_size with chainer.using_config("train", False): for n in xrange(args.num_generate): word_ids = np.arange(0, vocab_size, dtype=np.int32) token = ID_BOS x = np.asarray([[token]]).astype(np.int32) model.reset_state() while token != ID_EOS and x.shape[1] < args.max_sentence_length: u = model.forward_one_step(x) p = F.softmax(u).data[-1] token = np.random.choice(word_ids, size=1, p=p) x = np.append(x, np.asarray([token]).astype(np.int32), axis=1) sentence = [] for token in x[0]: word = vocab_inv[token] sentence.append(word) print(" ".join(sentence))