我们从Python开源项目中,提取了以下41个代码示例,用于说明如何使用chainer.functions()。
def _test_call(self, gpu): nonlinearity = getattr(F, self.nonlinearity) mlp = chainerrl.links.MLPBN( in_size=self.in_size, out_size=self.out_size, hidden_sizes=self.hidden_sizes, normalize_input=self.normalize_input, normalize_output=self.normalize_output, nonlinearity=nonlinearity, last_wscale=self.last_wscale, ) batch_size = 7 x = np.random.rand(batch_size, self.in_size).astype(np.float32) if gpu >= 0: mlp.to_gpu(gpu) x = chainer.cuda.to_gpu(x) y = mlp(x) self.assertEqual(y.shape, (batch_size, self.out_size)) self.assertEqual(chainer.cuda.get_array_module(y), chainer.cuda.get_array_module(x))
def forward_layers(self, x, average_pooling=False): if average_pooling: pooling = lambda x: chainer.functions.average_pooling_2d(chainer.functions.relu(x), 2, stride=2) else: pooling = lambda x: chainer.functions.max_pooling_2d(chainer.functions.relu(x), 2, stride=2) y1 = self.model.conv1_2(chainer.functions.relu(self.model.conv1_1(x))) x1 = pooling(y1) y2 = self.model.conv2_2(chainer.functions.relu(self.model.conv2_1(x1))) x2 = pooling(y2) y3 = self.model.conv3_3( chainer.functions.relu(self.model.conv3_2(chainer.functions.relu(self.model.conv3_1(x2))))) x3 = pooling(y3) y4 = self.model.conv4_3( chainer.functions.relu(self.model.conv4_2(chainer.functions.relu(self.model.conv4_1(x3))))) return [y1, y2, y3, y4]
def __call__(self, x_0: chainer.Variable, x_1: chainer.Variable) -> typing.List[chainer.Variable]: hs = [] h = self.c0_0(x_0) if self.will_concat: h = F.concat([h, self.c0_1(x_1)]) h = self.c1(h) hs.append(self.out_1(chainer.functions.average_pooling_2d(h, (h.shape[2], h.shape[3])))) # hs.append(chainer.functions.average_pooling_2d h = self.c2(h) hs.append(self.out_2(chainer.functions.average_pooling_2d(h, (h.shape[2], h.shape[3])))) h = self.c3(h) h = self.c4(h) hs.append(h) return hs
def _elementwise_clip(x, x_min, x_max): """Elementwise clipping Note: chainer.functions.clip supports clipping to constant intervals """ return F.minimum(F.maximum(x, x_min), x_max)
def _test_call(self, gpu): # This method only check if a given model can receive random input # data and return output data with the correct interface. nonlinearity = getattr(F, self.nonlinearity) min_action = np.full((self.action_size,), -0.01, dtype=np.float32) max_action = np.full((self.action_size,), 0.01, dtype=np.float32) model = self._make_model( n_input_channels=self.n_input_channels, action_size=self.action_size, bound_action=self.bound_action, min_action=min_action, max_action=max_action, nonlinearity=nonlinearity, ) batch_size = 7 x = np.random.rand( batch_size, self.n_input_channels).astype(np.float32) if gpu >= 0: model.to_gpu(gpu) x = chainer.cuda.to_gpu(x) min_action = chainer.cuda.to_gpu(min_action) max_action = chainer.cuda.to_gpu(max_action) y = model(x) self.assertTrue(isinstance( y, chainerrl.distribution.ContinuousDeterministicDistribution)) a = y.sample() self.assertTrue(isinstance(a, chainer.Variable)) self.assertEqual(a.shape, (batch_size, self.action_size)) self.assertEqual(chainer.cuda.get_array_module(a), chainer.cuda.get_array_module(x)) if self.bound_action: self.assertTrue((a.data <= max_action).all()) self.assertTrue((a.data >= min_action).all())
def _test_call(self, gpu): nonlinearity = getattr(F, self.nonlinearity) model = chainerrl.q_functions.FCSAQFunction( n_dim_obs=self.n_dim_obs, n_dim_action=self.n_dim_action, n_hidden_layers=self.n_hidden_layers, n_hidden_channels=self.n_hidden_channels, nonlinearity=nonlinearity, last_wscale=self.last_wscale, ) self._test_call_given_model(model, gpu)
def _test_call(self, gpu): nonlinearity = getattr(F, self.nonlinearity) model = chainerrl.q_functions.FCLSTMSAQFunction( n_dim_obs=self.n_dim_obs, n_dim_action=self.n_dim_action, n_hidden_layers=self.n_hidden_layers, n_hidden_channels=self.n_hidden_channels, nonlinearity=nonlinearity, last_wscale=self.last_wscale, ) self._test_call_given_model(model, gpu)
def _test_call(self, gpu): nonlinearity = getattr(F, self.nonlinearity) model = chainerrl.q_functions.FCBNLateActionSAQFunction( n_dim_obs=self.n_dim_obs, n_dim_action=self.n_dim_action, n_hidden_layers=self.n_hidden_layers, n_hidden_channels=self.n_hidden_channels, normalize_input=self.normalize_input, nonlinearity=nonlinearity, last_wscale=self.last_wscale, ) self._test_call_given_model(model, gpu)
def _test_call(self, gpu): nonlinearity = getattr(F, self.nonlinearity) model = chainerrl.q_functions.FCLateActionSAQFunction( n_dim_obs=self.n_dim_obs, n_dim_action=self.n_dim_action, n_hidden_layers=self.n_hidden_layers, n_hidden_channels=self.n_hidden_channels, nonlinearity=nonlinearity, last_wscale=self.last_wscale, ) self._test_call_given_model(model, gpu)
def __call__(self, x, t): """Perform a forward pass and compute the loss. This method ultimately defines the model. Args: x (chainer.Variable): Input vector. t (chainer.Variable): Target vector. Usually identical to `x` in the case of an Autoencoder. Returns: chainer.Variable: Loss. """ # Test different activation functions and dropout. h = self.l1(x) y = self.l2(h) if self.train: # Scale the MSE by 5, i.e 0.5 * 10 so that the loss can be compared to # the loss computed in Assignment 4. Factor 0.5, since the Chainer # implementation doesn't scale the error by 0.5 and factor 10, since # the previous assignment loss functions does not compute the mean, # and the number of summed elements are 10. self.loss = 5 * F.mean_squared_error(y, t) return self.loss else: return y
def _psroi_pooling_2d_yx( x, indices_and_rois, outh, outw, spatial_scale, group_size, output_dim): xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] pool = fcis.functions.psroi_pooling_2d( x, xy_indices_and_rois, outh, outw, spatial_scale, group_size, output_dim) return pool
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, _ = self.encoder(None, exs) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, ys = self.decoder(h, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs_f = xs xs_b = [x[::-1] for x in xs] exs_f = sequence_embed(self.embed_x, xs_f) exs_b = sequence_embed(self.embed_x, xs_b) _, hf = self.encoder_f(None, exs_f) _, hb = self.encoder_b(None, exs_b) ht = list(map(lambda x,y: F.concat([x, y], axis=1), hf, hb)) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder(None, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs_f = xs xs_b = [x[::-1] for x in xs] exs_f = sequence_embed(self.embed_x, xs_f) exs_b = sequence_embed(self.embed_x, xs_b) fx, _ = self.encoder_f(None, exs_f) bx, _ = self.encoder_b(None, exs_b) h = F.concat([fx, bx], axis=2) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, ys = self.decoder(h, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def _contents_loss(self, layers, content_layers): """ calculate content difference between original & processing """ loss_contents = chainer.Variable(self.xp.zeros((), dtype=numpy.float32)) for layer_index in range(len(layers)): loss_contents += numpy.float32(self.model.alpha[layer_index]) * chainer.functions.mean_squared_error( layers[layer_index], content_layers[layer_index]) return loss_contents
def _to_texture_feature(self, layers): """ :param layers: predicted value of each layer :type layers: List[chainer.Variable] """ subvectors = [] for layer_index in range(len(layers)): layer = layers[layer_index] beta = numpy.sqrt(numpy.float32(self.model.beta[layer_index]) / len(layers)) texture_matrix = float(beta) * neural_art.utility.get_matrix(layer) texture_matrix /= numpy.sqrt(numpy.prod(texture_matrix.data.shape)) # normalize subvector = chainer.functions.reshape(texture_matrix, (numpy.prod(texture_matrix.data.shape),)) subvectors.append(subvector) return chainer.functions.concat(subvectors, axis=0)
def squared_error(self, f1, f2): loss = chainer.functions.sum((f1 - f2) * (f1 - f2)) return loss
def _texture_loss(self, layers): loss_texture = chainer.Variable(self.xp.zeros((), dtype=self.xp.float32)) for layer_index in range(len(layers)): matrix = neural_art.utility.get_matrix(layers[layer_index]) loss = self.xp.float32(self.model.beta[layer_index]) * chainer.functions.mean_squared_error( matrix, self.texture_matrices[layer_index] ) / self.xp.float32(len(layers)) loss_texture += loss print("loss_texture", loss_texture.data) return loss_texture
def __call__(self, x, split_into_variables=True, discard_context=False): batchsize = x.shape[0] seq_length = x.shape[3] # conv out_data = self.conv_blocks(x) out_data = functions.reshape(out_data, (batchsize, -1, seq_length)) # rnn for index, blocks in enumerate(self.rnn_blocks.blocks): sru = blocks[0] dropout = blocks[1] if len(blocks) == 2 else None hidden, cell, context = sru(out_data, self.contexts[index]) if discard_context is False: self.contexts[index] = context if dropout is not None: out_data = dropout(out_data) # fc out_data = self.dense_blocks(out_data) assert out_data.shape[2] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 2) out_data = F.squeeze(out_data, axis=2) return out_data
def local_function_hooks(self): """Ordered Dictionary of registered function hooks. Contrary to ``chainer.thread_local.function_hooks``, which registers its elements to all functions, Function hooks in this property is specific to this function. """ if not hasattr(self, '_local_function_hooks'): self._local_function_hooks = collections.OrderedDict() return self._local_function_hooks
def __init__(self, out_dims=64, normalize_output=False): super(ModifiedGoogLeNet, self).__init__() # remove links and functions for name in [n for n in self._children if n.startswith('loss')]: self._children.remove(name) delattr(self, name) self.functions.pop('loss3_fc') self.functions.pop('prob') self.add_link('bn_fc', L.BatchNormalization(1024)) self.add_link('fc', L.Linear(1024, out_dims)) image_mean = np.array([123, 117, 104], dtype=np.float32) # RGB self._image_mean = image_mean[None, :, None, None] self.normalize_output = normalize_output
def __call__(self, cur_word): # Given the current word ID, predict the next word. x = self.embed(cur_word) # dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout with chainer.using_config('train', True): x = F.dropout(x, self.dropout) h = self.mid(x) with chainer.using_config('train', True): h = F.dropout(h, self.dropout) y = self.out(h) return y
def __call__(self, cur_word): # Given the current word ID, predict the next word. x = self.embed(cur_word) # dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout with chainer.using_config('train', True): x = F.dropout(x, args.dropout) h = self.mid(x) with chainer.using_config('train', True): h = F.dropout(h, args.dropout) y = self.out(h) return y
def __call__(self, src_data): # Some function naming F = chainer.functions dropout = lambda link: F.dropout(link, ratio=self.dropout_ratio, train=nmtrain.environment.is_train()) mem_optimize = nmtrain.optimization.chainer_mem_optimize # Reset both encoders self.encode_forward.reset_state() self.encode_backward.reset_state() # Perform encoding fe, be = [], [] src_input = self.xp.array(src_data, dtype=numpy.int32) for j in range(len(src_input)): forward_embed = dropout(mem_optimize(self.embed, nmtrain.environment.Variable(src_input[j]), level=1)) backward_embed = dropout(mem_optimize(self.embed, nmtrain.environment.Variable(src_input[-j-1]), level=1)) fe.append(self.encode_forward(forward_embed)) be.append(self.encode_backward(backward_embed)) # Joining encoding together S = [] for j in range(len(fe)): h = self.encode_project(F.concat((fe[j], be[-1-j]), axis=1)) S.append(F.expand_dims(h, axis=2)) S = F.swapaxes(F.concat(S, axis=2), 1, 2) # If lexicon is provided if self.lexicon is not None: lex_matrix = nmtrain.environment.Variable(self.lexicon.init(src_data, self.xp)) else: lex_matrix = None return h, S, lex_matrix
def firstInput(self,t,x=None): if x is None: x=Tensor.context _x = x.content _t = t.content _y = self.func(_x,mode=1) loss = chainer.functions.loss.softmax_cross_entropy.softmax_cross_entropy(_y,_t) self.func.y = _y self.func.loss = loss self.accum_loss += loss self.cur_log_perp += loss.data return x
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(): with chainer.using_config('train', False): result = [] ys = self.xp.zeros(batch, 'i') eys = self.embed_y(ys) eys = chainer.functions.split_axis( eys, batch, 0, force_tuple=True) # Receive hidden stats from encoder process. h, c, ys, _ = self.mn_decoder(eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) # Recursively decode using the previously predicted token. for i in range(1, max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis( eys, batch, 0, force_tuple=True) # Non-MN RNN link can be accessed via `actual_rnn`. h, c, ys = self.mn_decoder.actual_rnn(h, c, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == 0) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(): with chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) # Initial hidden variable and cell variable # zero = self.xp.zeros((self.n_layers, batch, self.n_units), 'f') # NOQA # h, c, _ = self.encoder(zero, zero, exs, train=False) # NOQA h, c, _ = self.encoder(None, None, exs) ys = self.xp.zeros(batch, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis( eys, batch, 0, force_tuple=True) h, c, ys = self.decoder(h, c, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == 0) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def __call__(self, hs, noise=None, *args, **kwargs): h = hs[-1] if noise is None: noise = self.create_noise(h.shape) hs_copy = [h_orig for h_orig in hs] hs_copy[-1] = chainer.functions.concat( (h, noise) ) return super().__call__(hs_copy, *args, **kwargs)
def __call__(self, x_0: chainer.Variable, x_1: chainer.Variable): h = self.c0_0(x_0) if self.will_concat: h = F.concat([h, self.c0_1(x_1)]) h = self.c1(h) # hs.append(chainer.functions.average_pooling_2d h = self.c2(h) h = self.c3(h) h = self.c4(h) # h = F.average_pooling_2d(h, h.data.shape[2], 1, 0) return h
def __call__(self, x): if self.has_uninitialized_params: with chainer.cuda.get_device(self._device_id): self._initialize_params(x.shape[1]) return chainer.functions.connection.convolution_2d.convolution_2d( x, self.W * self.mask, self.b, self.stride, self.pad, self.use_cudnn, deterministic=self.deterministic)
def functions(self): return collections.OrderedDict([ ('conv1', [self.conv1, self.bnorm1, F.relu]), ('conv2', [self.conv2, self.bnorm2, F.relu]), ('pool1', [_max_pooling_2d]), ('conv3', [self.conv3, self.bnorm3, F.relu]), ('conv4', [self.conv4, self.bnorm4, F.relu]), ('pool2', [_spatial_pyramid_pooling_2d]), ('fc', [self.fc]) ])
def available_layers(self): return list(self.functions.keys())
def __call__(self, x): h = x for key, funcs in self.functions.items(): for func in funcs: h = func(h) return h
def extract(self, x, layers=['conv4']): h = x activations = [] target_layers = set(layers) for key, funcs in self.functions.items(): for func in funcs: h = func(h) if key in target_layers: activations.append(h) target_layers.remove(key) return activations, h
def functions(self): return collections.OrderedDict([ ('conv1', [self.conv1, self.bnorm1, F.relu]), ('pool1', [_max_pooling_2d]), ('conv2', [self.conv2, self.bnorm2, F.relu]), ('pool2', [_spatial_pyramid_pooling_2d]), ('fc', [self.fc]) ])
def extract(self, x, layers=['conv2']): h = x activations = [] target_layers = set(layers) for key, funcs in self.functions.items(): for func in funcs: h = func(h) if key in target_layers: activations.append(h) target_layers.remove(key) return activations, h
def __init__(self, model): super(GuidedBackpropGrad, self).__init__(model) for key, funcs in model.predictor.functions.items(): ismatch = re.match(self.pattern, key) if ismatch: funcs[-1] = GuidedBackpropReLU()
def __init__(self, model, stdev_spread=.15, n_samples=25, magnitude=True): super(GuidedBackpropSmoothGrad, self).__init__( model, stdev_spread, n_samples, magnitude) for key, funcs in model.predictor.functions.items(): ismatch = re.match(self.pattern, key) if ismatch: funcs[-1] = GuidedBackpropReLU()
def translate(self, xs, max_length=100): print("Now translating") batch = len(xs) print("batch",batch) with chainer.no_backprop_mode(), chainer.using_config('train', False): wxs = [np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs] wx_len = [len(wx) for wx in wxs] wx_section = np.cumsum(wx_len[:-1]) valid_wx_section = np.insert(wx_section, 0, 0) cxs = [np.array([source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs] wexs = sequence_embed(self.embed_xw, wxs) cexs = sequence_embed(self.embed_xc, cxs) wexs_f = wexs wexs_b = [wex[::-1] for wex in wexs] cexs_f = cexs cexs_b = [cex[::-1] for cex in cexs] _, hfw = self.encoder_fw(None, wexs_f) _, hbw = self.encoder_bw(None, wexs_b) _, hfc = self.encoder_fc(None, cexs_f) _, hbc = self.encoder_bc(None, cexs_b) hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw] hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc] htw = list(map(lambda x,y: F.concat([x, y], axis=1), hfw, hbw)) htc = list(map(lambda x,y: F.concat([x, y], axis=1), hfc, hbc)) ht = list(map(lambda x,y: F.concat([x, y], axis=0), htw, htc)) ys = self.xp.full(batch, EOS, 'i') result = [] h=None for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder(h, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) h = F.transpose_sequence(h_list)[-1] h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1])) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): print("Now translating") batch = len(xs) print("batch",batch) with chainer.no_backprop_mode(), chainer.using_config('train', False): wxs = [np.array([source_word_ids.get(w, UNK) for w in x], dtype=np.int32) for x in xs] wx_len = [len(wx) for wx in wxs] wx_section = np.cumsum(wx_len[:-1]) valid_wx_section = np.insert(wx_section, 0, 0) cxs = [np.array([source_char_ids.get(c, UNK) for c in list("".join(x))], dtype=np.int32) for x in xs] wexs = sequence_embed(self.embed_xw, wxs) cexs = sequence_embed(self.embed_xc, cxs) wexs_f = wexs wexs_b = [wex[::-1] for wex in wexs] cexs_f = cexs cexs_b = [cex[::-1] for cex in cexs] _, hfw = self.encoder_fw(None, wexs_f) h1, hbw = self.encoder_bw(None, wexs_b) _, hfc = self.encoder_fc(None, cexs_f) h2, hbc = self.encoder_bc(None, cexs_b) hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw] hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc] htw = list(map(lambda x,y: F.concat([x, y], axis=1), hfw, hbw)) htc = list(map(lambda x,y: F.concat([x, y], axis=1), hfc, hbc)) ht = list(map(lambda x,y: F.concat([x, y], axis=0), htw, htc)) ys = self.xp.full(batch, EOS, 'i') result = [] h = F.concat([h1, h2], axis=2) for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h_list, h_bar_list, c_s_list, z_s_list = self.decoder(h, ht, eys) cys = chainer.functions.concat(h_list, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) h = F.transpose_sequence(h_list)[-1] h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1])) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs