Python chainer 模块,using_config() 实例源码
我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.using_config()。
def feature_map_activations(self, x):
"""Forward pass through the convolutional layers of the VGG returning
all of its intermediate feature map activations."""
hs = []
pre_pooling_sizes = []
h = x
for conv_block, mp in zip(self.conv_blocks, self.mps):
for conv in conv_block:
h = F.relu(conv(h))
pre_pooling_sizes.append(h.data.shape[2:])
# Disable cuDNN, else pooling indices will not be stored
with chainer.using_config('use_cudnn', 'never'):
h = mp.apply((h,))[0]
hs.append(h)
return hs, pre_pooling_sizes
def act(self, state):
with chainer.using_config('train', False):
s = self.batch_states([state], self.xp, self.phi)
if self.act_deterministically:
action = self.policy(s).most_probable
else:
action = self.policy(s).sample()
# Q is not needed here, but log it just for information
q = self.q_function(s, action)
# Update stats
self.average_q *= self.average_q_decay
self.average_q += (1 - self.average_q_decay) * float(q.data)
self.logger.debug('t:%s a:%s q:%s',
self.t, action.data[0], q.data)
return cuda.to_cpu(action.data[0])
def _compute_target_values(self, exp_batch, gamma):
batch_next_state = exp_batch['next_state']
with chainer.using_config('train', False):
with state_kept(self.q_function):
next_qout = self.q_function(batch_next_state)
target_next_qout = self.target_q_function(batch_next_state)
next_q_max = target_next_qout.evaluate_actions(
next_qout.greedy_actions)
batch_rewards = exp_batch['reward']
batch_terminal = exp_batch['is_state_terminal']
return batch_rewards + self.gamma * (1.0 - batch_terminal) * next_q_max
def test_rnn():
np.random.seed(0)
num_layers = 50
seq_length = num_layers * 2
batchsize = 2
vocab_size = 4
data = np.random.randint(0, vocab_size, size=(batchsize, seq_length), dtype=np.int32)
source, target = make_source_target_pair(data)
model = RNNModel(vocab_size, ndim_embedding=100, num_layers=num_layers, ndim_h=3, kernel_size=3, pooling="fo", zoneout=False, wgain=1, densely_connected=True)
with chainer.using_config("train", False):
np.random.seed(0)
model.reset_state()
Y = model(source).data
model.reset_state()
np.random.seed(0)
for t in range(source.shape[1]):
y = model.forward_one_step(source[:, :t+1]).data
target = np.swapaxes(np.reshape(Y, (batchsize, -1, vocab_size)), 1, 2)
target = np.reshape(np.swapaxes(target[:, :, t, None], 1, 2), (batchsize, -1))
assert np.sum((y - target) ** 2) == 0
print("t = {} OK".format(t))
def __call__(self, trainer):
print('## Calculate BLEU')
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
references = []
hypotheses = []
for i in range(0, len(self.test_data), self.batch):
sources, targets = zip(*self.test_data[i:i + self.batch])
references.extend([[t.tolist()] for t in targets])
sources = [
chainer.dataset.to_device(self.device, x) for x in sources]
ys = [y.tolist()
for y in self.model.translate(sources, self.max_length)]
hypotheses.extend(ys)
bleu = bleu_score.corpus_bleu(
references, hypotheses,
smoothing_function=bleu_score.SmoothingFunction().method1) * 100
print('BLEU:', bleu)
reporter.report({self.key: bleu})
def __call__(self, xs):
if self.freeze:
self.embed.disable_update()
xs = self.embed(xs)
batchsize, height, width = xs.shape
xs = F.reshape(xs, (batchsize, 1, height, width))
conv3_xs = self.conv3(xs)
conv4_xs = self.conv4(xs)
conv5_xs = self.conv5(xs)
h1 = F.max_pooling_2d(F.relu(conv3_xs), conv3_xs.shape[2])
h2 = F.max_pooling_2d(F.relu(conv4_xs), conv4_xs.shape[2])
h3 = F.max_pooling_2d(F.relu(conv5_xs), conv5_xs.shape[2])
concat_layer = F.concat([h1, h2, h3], axis=1)
with chainer.using_config('train', True):
y = self.l1(F.dropout(F.tanh(concat_layer)))
return y
def plot_scatter():
parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, default="model.hdf5")
args = parser.parse_args()
dataset_train, dataset_test = chainer.datasets.get_mnist()
images_train, labels_train = dataset_train._datasets
images_test, labels_test = dataset_test._datasets
model = Model()
assert model.load(args.model)
# normalize
images_train = (images_train - 0.5) * 2
images_test = (images_test - 0.5) * 2
with chainer.no_backprop_mode() and chainer.using_config("train", False):
z = model.encode_x_yz(images_test)[1].data
plot.scatter_labeled_z(z, labels_test, "scatter_gen.png")
def plot_representation():
parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, default="model.hdf5")
args = parser.parse_args()
dataset_train, dataset_test = chainer.datasets.get_mnist()
images_train, labels_train = dataset_train._datasets
images_test, labels_test = dataset_test._datasets
model = Model()
assert model.load(args.model)
# normalize
images_train = (images_train - 0.5) * 2
images_test = (images_test - 0.5) * 2
with chainer.no_backprop_mode() and chainer.using_config("train", False):
y_onehot, z = model.encode_x_yz(images_test, apply_softmax_y=True)
representation = model.encode_yz_representation(y_onehot, z).data
plot.scatter_labeled_z(representation, labels_test, "scatter_r.png")
def plot_z():
parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, default="model.hdf5")
args = parser.parse_args()
dataset_train, dataset_test = chainer.datasets.get_mnist()
images_train, labels_train = dataset_train._datasets
images_test, labels_test = dataset_test._datasets
model = Model()
assert model.load(args.model)
# normalize
images_train = (images_train - 0.5) * 2
images_test = (images_test - 0.5) * 2
with chainer.no_backprop_mode() and chainer.using_config("train", False):
z = model.encode_x_yz(images_test)[1].data
plot.scatter_labeled_z(z, labels_test, "scatter_z.png")
def plot_scatter():
parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, default="model.hdf5")
args = parser.parse_args()
dataset_train, dataset_test = chainer.datasets.get_mnist()
images_train, labels_train = dataset_train._datasets
images_test, labels_test = dataset_test._datasets
model = Model()
assert model.load(args.model)
# normalize
images_train = (images_train - 0.5) * 2
images_test = (images_test - 0.5) * 2
with chainer.no_backprop_mode() and chainer.using_config("train", False):
z = model.encode_x_z(images_test).data
plot.scatter_labeled_z(z, labels_test, "scatter_z.png")
def plot_scatter():
parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, default="model.hdf5")
args = parser.parse_args()
dataset_train, dataset_test = chainer.datasets.get_mnist()
images_train, labels_train = dataset_train._datasets
images_test, labels_test = dataset_test._datasets
model = Model()
assert model.load(args.model)
# normalize
images_train = (images_train - 0.5) * 2
images_test = (images_test - 0.5) * 2
with chainer.no_backprop_mode() and chainer.using_config("train", False):
z = model.encode_x_yz(images_test)[1].data
plot.scatter_labeled_z(z, labels_test, "scatter_gen.png")
def plot_representation():
parser = argparse.ArgumentParser()
parser.add_argument("--model", "-m", type=str, default="model.hdf5")
args = parser.parse_args()
dataset_train, dataset_test = chainer.datasets.get_mnist()
images_train, labels_train = dataset_train._datasets
images_test, labels_test = dataset_test._datasets
model = Model()
assert model.load(args.model)
# normalize
images_train = (images_train - 0.5) * 2
images_test = (images_test - 0.5) * 2
with chainer.no_backprop_mode() and chainer.using_config("train", False):
y_onehot, z = model.encode_x_yz(images_test, apply_softmax_y=True)
representation = model.encode_yz_representation(y_onehot, z).data
plot.scatter_labeled_z(representation, labels_test, "scatter_r.png")
def check_forward(self, x_data):
x = chainer.Variable(x_data)
# Make the batch normalization to be the identity function.
self.l.bn.avg_var[:] = 1
self.l.bn.avg_mean[:] = 0
with chainer.using_config('train', False):
y = self.l(x)
self.assertIsInstance(y, chainer.Variable)
self.assertIsInstance(y.array, self.l.xp.ndarray)
if self.activ == 'relu':
np.testing.assert_almost_equal(
cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0),
decimal=4
)
elif self.activ == 'add_one':
np.testing.assert_almost_equal(
cuda.to_cpu(y.array), cuda.to_cpu(x_data) + 1,
decimal=4
)
def out_generated_image(gen, dis, rows, cols, seed, dst):
@chainer.training.make_extension()
def make_image(trainer):
np.random.seed(seed)
n_images = rows * cols
xp = gen.xp
z = Variable(xp.asarray(gen.make_hidden(n_images)))
with chainer.using_config('train', False):
x = gen(z)
x = chainer.cuda.to_cpu(x.data)
np.random.seed()
x = np.asarray(np.clip(x * 255, 0.0, 255.0), dtype=np.uint8)
_, _, H, W = x.shape
x = x.reshape((rows, cols, 3, H, W))
x = x.transpose(0, 3, 1, 4, 2)
x = x.reshape((rows * H, cols * W, 3))
preview_dir = '{}/preview'.format(dst)
preview_path = preview_dir +\
'/image{:0>8}.png'.format(trainer.updater.iteration)
if not os.path.exists(preview_dir):
os.makedirs(preview_dir)
Image.fromarray(x).save(preview_path)
return make_image
def translate(self, sentence: np.ndarray, max_length: int = 30) -> List[int]:
with chainer.no_backprop_mode(), chainer.using_config('train', False):
sentence = sentence[::-1]
embedded_xs = self._embed_input(sentence)
hidden_states, cell_states, attentions = self._encoder(None, None, [embedded_xs])
wid = EOS
result = []
for i in range(max_length):
output, hidden_states, cell_states = \
self._translate_one_word(wid, hidden_states, cell_states, attentions)
wid = np.argmax(output.data)
if wid == EOS:
break
result.append(wid)
return result
def test_pretrained_on_target(source_cnn, target, args):
print(":: testing pretrained source CNN on target domain")
if args.device >= 0:
source_cnn.to_gpu()
with chainer.using_config('train', False):
_, target_test_iterator = data2iterator(target, args.batchsize, multiprocess=False)
mean_accuracy = 0.0
n_batches = 0
for batch in target_test_iterator:
batch, labels = chainer.dataset.concat_examples(batch, device=args.device)
encode = source_cnn.encoder(batch)
classify = source_cnn.classifier(encode)
acc = accuracy.accuracy(classify, labels)
mean_accuracy += acc.data
n_batches += 1
mean_accuracy /= n_batches
print(":: classifier trained on only source, evaluated on target: accuracy {}%".format(mean_accuracy))
def _forward(self, *args, calc_score=False):
"""Forward computation without backward.
Predicts by the model's output by returning `predictor`'s output
"""
with chainer.using_config('train', False), chainer.no_backprop_mode():
if calc_score:
self(*args)
return self.y
else:
if self.predictor is None:
print("[ERROR] predictor is not set or not build yet.")
return
# TODO: it passes all the args, sometimes (x, y) which is too many arguments.
# Consider how to deal with the number of input
if hasattr(self.predictor, '_forward'):
fn = self.predictor._forward
else:
fn = self.predictor
return fn(*filter_args(fn, args))
def test_rnn():
np.random.seed(0)
num_blocks = 10
num_layers_per_block = 5
seq_length = num_layers_per_block * num_blocks * 2
batchsize = 2
vocab_size = 4
data = np.random.randint(0, vocab_size, size=(batchsize, seq_length), dtype=np.int32)
source, target = make_source_target_pair(data)
model = RNNModel(vocab_size, ndim_embedding=3, num_blocks=num_blocks, num_layers_per_block=num_layers_per_block, ndim_h=3, kernel_size=3, wgain=1)
with chainer.using_config("train", False):
np.random.seed(0)
model.reset_state()
Y = model(source).data
model.reset_state()
np.random.seed(0)
for t in xrange(source.shape[1]):
y = model.forward_one_step(source[:, :t+1]).data
target = np.swapaxes(np.reshape(Y, (batchsize, -1, vocab_size)), 1, 2)
target = np.reshape(np.swapaxes(target[:, :, t, None], 1, 2), (batchsize, -1))
assert np.sum((y - target) ** 2) == 0
print("t = {} OK".format(t))
def forward(net, image_batch, sentence_batch, train=True):
images = xp.asarray(image_batch)
n, sentence_length = sentence_batch.shape
net.initialize(images)
loss = 0
acc = 0
size = 0
for i in range(sentence_length - 1):
target = xp.where(xp.asarray(sentence_batch[:, i]) != eos, 1, 0).astype(np.float32)
if (target == 0).all():
break
with chainer.using_config('train', train):
with chainer.using_config('enable_backprop', train):
x = xp.asarray(sentence_batch[:, i])
t = xp.asarray(sentence_batch[:, i + 1])
y = net(x)
y_max_index = xp.argmax(y.data, axis=1)
mask = target.reshape((len(target), 1)).repeat(y.data.shape[1], axis=1)
y = y * mask
loss += F.softmax_cross_entropy(y, t)
acc += xp.sum((y_max_index == t) * target)
size += xp.sum(target)
return loss / size, float(acc) / size, float(size)
def evaluate(model, dataset, crop_margin, test_size):
xp = model.xp
iterator = chainer.iterators.SerialIterator(dataset, 1, repeat=False, shuffle=False)
acc_sum = 0
iteration = 0
for batch in iterator:
image_batch = []
label_batch = []
for image_path, category_id, _ in batch:
image = load_image(image_path)
image_width, image_height = image.size
crop_size = min(image_width, image_height) - crop_margin
crop_rect = ((image_width - crop_size) // 2, (image_height - crop_size) // 2, crop_size, crop_size)
# input_size = test_size
input_size = int(round(crop_size / 32.0) * 32)
if input_size < 64:
input_size = 64
elif input_size > test_size:
input_size = test_size
image_batch.append(transform_image(image, crop_rect, input_size))
label_batch.append(category_id)
x = xp.asarray(image_batch)
t = xp.asarray(label_batch)
with chainer.using_config('enable_backprop', False):
with chainer.using_config('train', False):
y = model(x)
acc = F.accuracy(y, t)
acc_sum += float(acc.data)
return acc_sum / len(dataset)
def evaluate(model, dataset, crop_margin, test_size, batch_size):
xp = model.xp
iterator = chainer.iterators.SerialIterator(dataset, batch_size, repeat=False, shuffle=False)
acc_sum = 0
iteration = 0
for batch in iterator:
image_batch = []
label_batch = []
for image_path, category_id, _ in batch:
image = load_image(image_path)
image_width, image_height = image.size
crop_size = min(image_width, image_height) - crop_margin
crop_rect = ((image_width - crop_size) // 2, (image_height - crop_size) // 2, crop_size, crop_size)
input_size = test_size
image_batch.append(transform_image(image, crop_rect, input_size))
label_batch.append(category_id)
x = xp.asarray(image_batch)
t = xp.asarray(label_batch)
with chainer.using_config('enable_backprop', False):
with chainer.using_config('train', False):
y = model(x)
acc = F.accuracy(y, t)
acc_sum += float(acc.data) * batch_size
return acc_sum / len(dataset)
def disable_train(chain):
call_orig = chain.__call__
def call_test(self, x):
with chainer.using_config('train', False):
return call_orig(self, x)
chain.__call__ = call_test
def act(self, state):
with chainer.using_config('train', False):
s = self.batch_states([state], self.xp, self.phi)
action = self.policy(s).sample()
# Q is not needed here, but log it just for information
q = self.q_function(s, action)
# Update stats
self.average_q *= self.average_q_decay
self.average_q += (1 - self.average_q_decay) * float(q.data)
self.logger.debug('t:%s a:%s q:%s',
self.t, action.data[0], q.data)
return cuda.to_cpu(action.data[0])
def _act(self, state):
xp = self.xp
with chainer.using_config('train', False):
b_state = batch_states([state], xp, self.phi)
with chainer.no_backprop_mode():
action_distrib, v = self.model(b_state)
action = action_distrib.sample()
return cuda.to_cpu(action.data)[0], cuda.to_cpu(v.data)[0]
def compute_q_values(self, states):
"""Compute Q-values
Args:
states (list of cupy.ndarray or numpy.ndarray)
Returns:
list of numpy.ndarray
"""
with chainer.using_config('train', False):
if not states:
return []
batch_x = self.batch_states(states, self.xp, self.phi)
q_values = list(cuda.to_cpu(
self.model(batch_x).q_values))
return q_values
def act(self, state):
with chainer.using_config('train', False):
with chainer.no_backprop_mode():
action_value = self.model(
self.batch_states([state], self.xp, self.phi))
q = float(action_value.max.data)
action = cuda.to_cpu(action_value.greedy_actions.data)[0]
# Update stats
self.average_q *= self.average_q_decay
self.average_q += (1 - self.average_q_decay) * q
self.logger.debug('t:%s q:%s action_value:%s', self.t, q, action_value)
return action
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss = self.CalcLoss(xs, ys)
return loss.data
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
h, _ = self.encoder(None, exs)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, ys = self.decoder(h, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs_f = xs
xs_b = [x[::-1] for x in xs]
exs_f = sequence_embed(self.embed_x, xs_f)
exs_b = sequence_embed(self.embed_x, xs_b)
_, hf = self.encoder_f(None, exs_f)
_, hb = self.encoder_b(None, exs_b)
ht = list(map(lambda x,y: F.concat([x, y], axis=1), hf, hb))
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h_list, h_bar_list, c_s_list, z_s_list = self.decoder(None, ht, eys)
cys = chainer.functions.concat(h_list, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs_f = xs
xs_b = [x[::-1] for x in xs]
exs_f = sequence_embed(self.embed_x, xs_f)
exs_b = sequence_embed(self.embed_x, xs_b)
fx, _ = self.encoder_f(None, exs_f)
bx, _ = self.encoder_b(None, exs_b)
h = F.concat([fx, bx], axis=2)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, ys = self.decoder(h, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss = self.CalcLoss(xs, ys)
return loss.data
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode(), chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
h, c, _ = self.encoder(None, None, exs)
ys = self.xp.full(batch, EOS, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(eys, batch, 0)
h, c, ys = self.decoder(h, c, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == EOS)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss, n_w, n_c, n_c_a = self.CalcLoss(xs, ys)
return loss.data
def CalculateValLoss(self, xs, ys):
with chainer.no_backprop_mode(), chainer.using_config('train', False):
loss, n_w, n_c, n_c_a = self.CalcLoss(xs, ys)
return loss.data
def generate_text(model, seed, length=512, top_n=10):
"""
generates text of specified length from trained model
with given seed character sequence.
"""
logger.info("generating %s characters from top %s choices.", length, top_n)
logger.info('generating with seed: "%s".', seed)
generated = seed
encoded = encode_text(seed).astype(np.int32)
model.predictor.reset_state()
with chainer.using_config("train", False), chainer.no_backprop_mode():
for idx in encoded[:-1]:
x = Variable(np.array([idx]))
# input shape: [1]
# set internal states
model.predictor(x)
next_index = encoded[-1]
for i in range(length):
x = Variable(np.array([next_index], dtype=np.int32))
# input shape: [1]
probs = F.softmax(model.predictor(x))
# output shape: [1, vocab_size]
next_index = sample_from_probs(probs.data.squeeze(), top_n)
# append to sequence
generated += ID2CHAR[next_index]
logger.info("generated text: \n%s\n", generated)
return generated
def out_generated_image(gen, dis, rows, cols, seed, dst, writer):
@chainer.training.make_extension()
def make_image(trainer):
np.random.seed(seed)
n_images = rows * cols
xp = gen.xp
z = Variable(xp.asarray(gen.make_hidden(n_images)))
with chainer.using_config('train', False):
x = gen(z)
writer.add_image('img', x, trainer.updater.iteration)
return make_image
def check_forward(self, x, use_cudnn='always'):
with chainer.using_config('use_cudnn', use_cudnn):
y = normalize_layer(x, eps=self.eps)
self.assertEqual(y.data.dtype, self.dtype)
y_expect = _normalize_layer(self.x, self.eps).data
testing.assert_allclose(y_expect, y.data, **self.check_forward_options)
def check_backward(self, x, y_grad, use_cudnn='always'):
with chainer.using_config('use_cudnn', use_cudnn), chainer.using_config('train', self.train):
gradient_check.check_backward(
NormalizeLayer(self.eps), x, y_grad,
**self.check_backward_options)
def main():
model = load_model(args.model_dir)
assert model is not None
vocab, vocab_inv = load_vocab(args.model_dir)
assert vocab is not None
assert vocab_inv is not None
vocab_size = model.vocab_size
with chainer.using_config("train", False):
for n in range(args.num_generate):
word_ids = np.arange(0, vocab_size, dtype=np.int32)
token = ID_BOS
x = np.asarray([[token]]).astype(np.int32)
model.reset_state()
while token != ID_EOS and x.shape[1] < args.max_sentence_length:
u = model.forward_one_step(x)
p = F.softmax(u).data[-1]
token = np.random.choice(word_ids, size=1, p=p)
x = np.append(x, np.asarray([token]).astype(np.int32), axis=1)
sentence = []
for token in x[0]:
word = vocab_inv[token]
sentence.append(word)
print(" ".join(sentence))
def translate(self, x_block, max_length=50):
# TODO: efficient inference by re-using convolution result
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
# if isinstance(x_block, list):
x_block = source_pad_concat_convert(
x_block, device=None)
batch, x_length = x_block.shape
y_block = self.xp.zeros((batch, 1), dtype=x_block.dtype)
eos_flags = self.xp.zeros((batch, ), dtype=x_block.dtype)
result = []
for i in range(max_length):
log_prob_tail = self(x_block, y_block, y_block,
get_prediction=True)
ys = self.xp.argmax(log_prob_tail.data, axis=1).astype('i')
result.append(ys)
y_block = F.concat([y_block, ys[:, None]], axis=1).data
eos_flags += (ys == 0)
if self.xp.all(eos_flags):
break
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = np.argwhere(y == 0)
if len(inds) > 0:
y = y[:inds[0, 0]]
if len(y) == 0:
y = np.array([1], 'i')
outs.append(y)
return outs
def __call__(self, cur_word):
# Given the current word ID, predict the next word.
x = self.embed(cur_word)
# dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout
with chainer.using_config('train', True):
x = F.dropout(x, self.dropout)
h = self.mid(x)
with chainer.using_config('train', True):
h = F.dropout(h, self.dropout)
y = self.out(h)
return y
def __call__(self, cur_word):
# Given the current word ID, predict the next word.
x = self.embed(cur_word)
# dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout
with chainer.using_config('train', True):
x = F.dropout(x, args.dropout)
h = self.mid(x)
with chainer.using_config('train', True):
h = F.dropout(h, args.dropout)
y = self.out(h)
return y
def __evaluate(self, data):
iterator = chainer.iterators.SerialIterator(data, self.batch_size, repeat=False, shuffle=False)
total_loss = 0
total_acc = 0
num = 0
with chainer.using_config('enable_backprop', False):
with chainer.using_config('train', False):
for batch in iterator:
x_batch, y_batch = convert.concat_examples(batch, self.device_id)
loss, acc = self.__forward(x_batch, y_batch)
total_loss += float(loss.data) * len(x_batch)
total_acc += float(acc.data) * len(x_batch)
num += len(x_batch)
iterator.finalize()
return total_loss / num, total_acc / num
def _pool_without_cudnn(p, x):
with chainer.using_config('use_cudnn', 'never'):
return p.apply((x,))[0]
def predict(self, imgs):
"""Conduct semantic segmentations from images.
Args:
imgs (iterable of numpy.ndarray): Arrays holding images.
All images are in CHW and RGB format
and the range of their values are :math:`[0, 255]`.
Returns:
list of numpy.ndarray:
List of integer labels predicted from each image in the input \
list.
"""
labels = list()
for img in imgs:
C, H, W = img.shape
with chainer.using_config('train', False), \
chainer.function.no_backprop_mode():
x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
score = self.__call__(x)[0].data
score = chainer.cuda.to_cpu(score)
if score.shape != (C, H, W):
dtype = score.dtype
score = resize(score, (H, W)).astype(dtype)
label = np.argmax(score, axis=0).astype(np.int32)
labels.append(label)
return labels
def translate(self, xs, max_length=100):
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
# Encode input sequence and send hidden stats to decoder.
self.mn_encoder(exs)
# Encoder does not return anything.
# All evaluation will be done in decoder process.
return None
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
result = []
ys = self.xp.zeros(batch, 'i')
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(
eys, batch, 0, force_tuple=True)
# Receive hidden stats from encoder process.
h, c, ys, _ = self.mn_decoder(eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
# Recursively decode using the previously predicted token.
for i in range(1, max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(
eys, batch, 0, force_tuple=True)
# Non-MN RNN link can be accessed via `actual_rnn`.
h, c, ys = self.mn_decoder.actual_rnn(h, c, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = numpy.argwhere(y == 0)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def translate(self, xs, max_length=100):
batch = len(xs)
with chainer.no_backprop_mode():
with chainer.using_config('train', False):
xs = [x[::-1] for x in xs]
exs = sequence_embed(self.embed_x, xs)
# Initial hidden variable and cell variable
# zero = self.xp.zeros((self.n_layers, batch, self.n_units), 'f') # NOQA
# h, c, _ = self.encoder(zero, zero, exs, train=False) # NOQA
h, c, _ = self.encoder(None, None, exs)
ys = self.xp.zeros(batch, 'i')
result = []
for i in range(max_length):
eys = self.embed_y(ys)
eys = chainer.functions.split_axis(
eys, batch, 0, force_tuple=True)
h, c, ys = self.decoder(h, c, eys)
cys = chainer.functions.concat(ys, axis=0)
wy = self.W(cys)
ys = self.xp.argmax(wy.data, axis=1).astype('i')
result.append(ys)
result = cuda.to_cpu(self.xp.stack(result).T)
# Remove EOS taggs
outs = []
for y in result:
inds = numpy.argwhere(y == 0)
if len(inds) > 0:
y = y[:inds[0, 0]]
outs.append(y)
return outs
def translate_with_beam_search(self, sentence: np.ndarray, max_length: int = 30, beam_width=3) -> List[int]:
with chainer.no_backprop_mode(), chainer.using_config('train', False):
sentence = sentence[::-1]
embedded_xs = self._embed_input(sentence)
hidden_states, cell_states, attentions = self._encoder(None, None, [embedded_xs])
heaps = [[] for _ in range(max_length + 1)]
heaps[0].append((0, [EOS], hidden_states, cell_states)) # (score, translation, hidden_states, cell_states)
solution = []
solution_score = 1e8
for i in range(max_length):
heaps[i] = sorted(heaps[i], key=lambda t: t[0])[:beam_width]
for score, translation, i_hidden_states, i_cell_states in heaps[i]:
wid = translation[-1]
output, new_hidden_states, new_cell_states = \
self._translate_one_word(wid, i_hidden_states, i_cell_states, attentions)
for next_wid in np.argsort(output.data)[::-1]:
if output.data[next_wid] < 1e-6:
break
next_score = score - np.log(output.data[next_wid])
if next_score > solution_score:
break
next_translation = translation + [next_wid]
next_item = (next_score, next_translation, new_hidden_states, new_cell_states)
if next_wid == EOS:
if next_score < solution_score:
solution = translation[1:] # [1:] drops first EOS
solution_score = next_score
else:
heaps[i + 1].append(next_item)
return solution
def main(args):
model = load_model(args.model_dir)
assert model is not None
vocab, vocab_inv = load_vocab(args.model_dir)
assert vocab is not None
assert vocab_inv is not None
vocab_size = model.vocab_size
with chainer.using_config("train", False):
for n in xrange(args.num_generate):
word_ids = np.arange(0, vocab_size, dtype=np.int32)
token = ID_BOS
x = np.asarray([[token]]).astype(np.int32)
model.reset_state()
while token != ID_EOS and x.shape[1] < args.max_sentence_length:
u = model.forward_one_step(x)
p = F.softmax(u).data[-1]
token = np.random.choice(word_ids, size=1, p=p)
x = np.append(x, np.asarray([token]).astype(np.int32), axis=1)
sentence = []
for token in x[0]:
word = vocab_inv[token]
sentence.append(word)
print(" ".join(sentence))