我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用chainer.links.LSTM。
def __init__(self, args): super(LSTM, self).__init__( # RNN LSTM=L.LSTM(args.n_in_units, args.n_units), #W_predict=L.Linear(args.n_units, args.n_units), W_candidate=L.Linear(args.n_in_units, args.n_units), ) #self.act1 = F.tanh self.act1 = F.identity self.args = args self.n_in_units = args.n_in_units self.n_units = args.n_units self.dropout_ratio = args.d_ratio self.margin = args.margin self.initialize_parameters()
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers): self.n_input_channels = n_dim_obs self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.state_stack = [] super().__init__() with self.init_scope(): self.fc = MLP(in_size=self.n_input_channels, out_size=n_hidden_channels, hidden_sizes=[self.n_hidden_channels] * self.n_hidden_layers) self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels) self.out = L.Linear(n_hidden_channels, n_dim_action)
def __init__(self, n_layers=2, eDim=512, hDim=512, name=""): layers = [0] * n_layers # ???????? for z in six.moves.range(n_layers): if z == 0: # ???????? eDim tDim = eDim else: # ????????????????????????hDim tDim = hDim layers[z] = chaLink.LSTM(tDim, hDim) # log?????????????????????? layers[z].lateral.W.name = name + "_L%d_la_W" % (z + 1) layers[z].upward.W.name = name + "_L%d_up_W" % (z + 1) layers[z].upward.b.name = name + "_L%d_up_b" % (z + 1) super(NLayerLSTM, self).__init__(*layers) # ????????????LSTM???
def __init__(self, N_SOURCE_VOCAB, N_TARGET_VOCAB, N_EMBED, N_HIDDEN, train=True): super(EncDecModel, self).__init__( # Encoder enc_embed=L.EmbedID(N_SOURCE_VOCAB, N_EMBED), enc_lstm_1=L.LSTM(N_EMBED, N_HIDDEN), enc_lstm_2=L.LSTM(N_HIDDEN, N_HIDDEN), # Decoder initializer enc_dec_1_c=L.Linear(N_HIDDEN, N_HIDDEN), enc_dec_1_h=L.Linear(N_HIDDEN, N_HIDDEN), enc_dec_2_c=L.Linear(N_HIDDEN, N_HIDDEN), enc_dec_2_h=L.Linear(N_HIDDEN, N_HIDDEN), # Decoder dec_embed=L.EmbedID(N_TARGET_VOCAB, N_EMBED), dec_lstm_1=L.LSTM(N_EMBED, N_HIDDEN), dec_lstm_2=L.LSTM(N_HIDDEN, N_HIDDEN), dec_output=L.Linear(N_HIDDEN, N_TARGET_VOCAB), ) for param in self.params(): param.data[...] = self.xp.random.uniform(-0.08, 0.08, param.data.shape) self.train = train self.src_vocab_size = N_SOURCE_VOCAB self.trg_vocab_size = N_TARGET_VOCAB self.embed_size = N_EMBED self.hidden_size = N_HIDDEN
def __init__(self, width=150, height=112, channel=3, action_size=100, latent_size=100): feature_width = width feature_height = height for i in range(4): feature_width = (feature_width + 1) // 2 feature_height = (feature_height + 1) // 2 feature_size = feature_width * feature_height * 64 super(Q, self).__init__( conv1 = L.Convolution2D(channel, 16, 8, stride=4, pad=3), conv2 = L.Convolution2D(16, 32, 5, stride=2, pad=2), conv3 = L.Convolution2D(32, 64, 5, stride=2, pad=2), lstm = L.LSTM(feature_size, latent_size), q = L.Linear(latent_size, action_size), ) self.width = width self.height = height self.latent_size = latent_size
def initialize_LSTM(self, LSTM, initializer): initializers.init_weight(LSTM.upward.W.data, initializer) initializers.init_weight(LSTM.lateral.W.data, initializer)
def initialize_parameters(self): G_init = initializers.GlorotNormal() #initializers.init_weight(self.W_predict.W.data, G_init) initializers.init_weight(self.W_candidate.W.data, G_init) self.initialize_LSTM(self.LSTM, G_init)
def solve(self, x_seq, pos, neg, train=True, variablize=False, onebyone=True): if variablize:# If arguments are just arrays (not variables), make them variables x_seq = [chainer.Variable(x, volatile=not train) for x in x_seq] x_seq = [F.dropout(x, ratio=self.dropout_ratio, train=train) for x in x_seq] pos = self.act1(self.W_candidate( F.dropout(chainer.Variable(pos, volatile=not train), ratio=self.dropout_ratio, train=train))) neg = self.act1(self.W_candidate( F.dropout(chainer.Variable(neg, volatile=not train), ratio=self.dropout_ratio, train=train))) if onebyone and train: target_x_seq = [self.act1(self.W_candidate(x)) for x in x_seq[:4]]# 1,2,3,4,5-th targets onebyone_loss = 0. self.LSTM.reset_state() for i, x in enumerate(x_seq): h = self.LSTM( F.dropout(x, ratio=self.dropout_ratio, train=train) ) if onebyone and train and target_x_seq[i+1:]: pos_score, neg_score = self.calculate_score(h, target_x_seq[i+1:], neg, multipos=True) onebyone_loss += F.relu( self.margin - pos_score + neg_score ) pos_score, neg_score = self.calculate_score(h, pos, neg) accum_loss = F.relu( self.margin - pos_score + neg_score ) TorFs = sum(accum_loss.data < self.margin) if onebyone and train: return F.sum(accum_loss) + F.sum(onebyone_loss), TorFs else: return F.sum(accum_loss), TorFs
def __init__(self, obs_size, action_size, hidden_size=200, lstm_size=128): self.pi_head = L.Linear(obs_size, hidden_size) self.v_head = L.Linear(obs_size, hidden_size) self.pi_lstm = L.LSTM(hidden_size, lstm_size) self.v_lstm = L.LSTM(hidden_size, lstm_size) self.pi = policies.LinearGaussianPolicyWithDiagonalCovariance( lstm_size, action_size) self.v = v_function.FCVFunction(lstm_size) super().__init__(self.pi_head, self.v_head, self.pi_lstm, self.v_lstm, self.pi, self.v)
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): self.fc = MLP(self.n_input_channels, n_hidden_channels, [self.n_hidden_channels] * self.n_hidden_layers, nonlinearity=nonlinearity, ) self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels) self.out = L.Linear(n_hidden_channels, 1, initialW=LeCunNormal(last_wscale))
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action if self.bound_action: def action_filter(x): return bound_by_tanh( x, self.min_action, self.max_action) else: action_filter = None model = chainer.Chain( fc=MLP(self.n_input_channels, n_hidden_channels, (self.n_hidden_channels,) * self.n_hidden_layers, nonlinearity=nonlinearity, ), lstm=L.LSTM(n_hidden_channels, n_hidden_channels), out=L.Linear(n_hidden_channels, action_size, initialW=LeCunNormal(last_wscale)), ) def model_call(model, x): h = nonlinearity(model.fc(x)) h = model.lstm(h) h = model.out(h) return h super().__init__( model=model, model_call=model_call, action_filter=action_filter)
def __init__(self, n_actions): self.head = links.NIPSDQNHead() self.pi = policy.FCSoftmaxPolicy( self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) self.lstm = L.LSTM(self.head.n_output_channels, self.head.n_output_channels) super().__init__(self.head, self.lstm, self.pi, self.v)
def __init__(self, n_actions): self.head = dqn_head.NIPSDQNHead() self.pi = policy.FCSoftmaxPolicy( self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) self.lstm = L.LSTM(self.head.n_output_channels, self.head.n_output_channels) super().__init__(self.head, self.lstm, self.pi, self.v) init_like_torch(self)
def __init__(self, n_actions): self.head = dqn_head.NIPSDQNHead(n_input_channels=3) self.pi = policy.FCSoftmaxPolicy( self.head.n_output_channels, n_actions) self.v = v_function.FCVFunction(self.head.n_output_channels) self.lstm = L.LSTM(self.head.n_output_channels, self.head.n_output_channels) super().__init__(self.head, self.lstm, self.pi, self.v) init_like_torch(self)
def __init__(self, deep, gpu, word2index, in_units, hidden_units, out_units, loss_func, train, drop_ratio=0.0): n_vocab = len(word2index) l2r_embedding=F.EmbedID(n_vocab, in_units) r2l_embedding=F.EmbedID(n_vocab, in_units) if deep: super(BiLstmContext, self).__init__( l2r_embed=l2r_embedding, r2l_embed=r2l_embedding, loss_func=loss_func, l2r_1 = L.LSTM(in_units, hidden_units), r2l_1 = L.LSTM(in_units, hidden_units), l3 = L.Linear(2*hidden_units, 2*hidden_units), l4 = L.Linear(2*hidden_units, out_units), ) else: super(BiLstmContext, self).__init__( l2r_embed=l2r_embedding, r2l_embed=r2l_embedding, loss_func=loss_func, l2r_1 = L.LSTM(in_units, hidden_units), r2l_1 = L.LSTM(in_units, hidden_units), lp_l2r = L.Linear(hidden_units, out_units/2), lp_r2l = L.Linear(hidden_units, out_units/2) ) if gpu >=0: self.to_gpu() l2r_embedding.W.data = self.xp.random.normal(0, math.sqrt(1. / l2r_embedding.W.data.shape[0]), l2r_embedding.W.data.shape).astype(np.float32) r2l_embedding.W.data = self.xp.random.normal(0, math.sqrt(1. / r2l_embedding.W.data.shape[0]), r2l_embedding.W.data.shape).astype(np.float32) self.word2index = word2index self.train = train self.deep = deep self.drop_ratio = drop_ratio
def __init__(self, n_vocab, n_units): #n_units = ?????????? super(LSTM, self).__init__( embed=L.EmbedID(n_vocab, n_units, ignore_label=-1), l1=L.LSTM(n_units, n_units), l2=L.Linear(n_units, n_vocab) )
def __init__(self, n_vocab, n_units, train=True): super(RNNLM, self).__init__( embed=L.EmbedID(n_vocab, n_units), l1=L.LSTM(n_units, n_units), l2=L.LSTM(n_units, n_units), l3=L.Linear(n_units, n_vocab), ) self.train = train
def setUp(self): self.link = links.LSTM(self.in_size, self.out_size) upward = self.link.upward.W.data upward[...] = numpy.random.uniform(-1, 1, upward.shape) lateral = self.link.lateral.W.data lateral[...] = numpy.random.uniform(-1, 1, lateral.shape) self.link.zerograds() self.upward = upward.copy() # fixed on CPU self.lateral = lateral.copy() # fixed on CPU x_shape = (4, self.in_size) self.x = numpy.random.uniform(-1, 1, x_shape).astype(numpy.float32)
def setUp(self): self.link = links.LSTM(5, 7) self.x = chainer.Variable( numpy.random.uniform(-1, 1, (3, 5)).astype(numpy.float32))
def __init__(self, n_vocab_char, n_units, n_units_char, index2charIds, dropout=.2): #dropout ratio, zero indicates no dropout super(RNN, self).__init__() with self.init_scope(): self.embed = L.EmbedID( n_vocab_char, n_units_char, initialW=I.Uniform(1. / n_units_char)) # word embedding self.mid = L.LSTM(n_units_char, n_units_char) # the first LSTM layer self.out = L.Linear(n_units_char, n_units) # the feed-forward output layer self.dropout = dropout self.index2charIds = index2charIds
def charRNN(self, context): # input a list of word ids, output a list of word embeddings # if chainer.config.train: # print("train") # else: # print("test") contexts2charIds = self.index2charIds[context] #sorting the context_char, make sure array length in descending order # ref: https://docs.chainer.org/en/stable/reference/generated/chainer.links.LSTM.html?highlight=Variable-length context_char_length = np.array([len(t) for t in contexts2charIds]) argsort = context_char_length.argsort()[::-1] # descending order argsort_reverse = np.zeros(len(argsort), dtype=np.int32) # this is used to restore the original order for i in range(len(argsort)): argsort_reverse[argsort[i]] = i contexts2charIds = contexts2charIds[context_char_length.argsort()[::-1]] #transpose a 2D list/numpy array rnn_inputs = [[] for i in range(len(contexts2charIds[0]))] for j in range(len(contexts2charIds)) : for i in range(len(contexts2charIds[j])): rnn_inputs[i].append(contexts2charIds[j][i]) self.reset_state() for i in range(len(rnn_inputs)): y_ = self(np.array(rnn_inputs[i], np.int32)) y = self.out(self.mid.h) y = y[argsort_reverse] # restore the original order return y
def __init__(self, n_vocab_char, n_units, n_units_char): super(RNN, self).__init__() with self.init_scope(): self.embed = L.EmbedID( n_vocab_char, n_units_char, initialW=I.Uniform(1. / n_units_char)) # word embedding self.mid = L.LSTM(n_units_char, n_units_char) # the first LSTM layer self.out = L.Linear(n_units_char, n_units) # the feed-forward output layer
def charRNN(self, context): # input a list of word ids, output a list of word embeddings # if chainer.config.train: # print("train") # else: # print("test") contexts2charIds = index2charIds[context] #sorting the context_char, make sure array length in descending order # ref: https://docs.chainer.org/en/stable/reference/generated/chainer.links.LSTM.html?highlight=Variable-length context_char_length = np.array([len(t) for t in contexts2charIds]) argsort = context_char_length.argsort()[::-1] # descending order argsort_reverse = np.zeros(len(argsort), dtype=np.int32) # this is used to restore the original order for i in range(len(argsort)): argsort_reverse[argsort[i]] = i contexts2charIds = contexts2charIds[context_char_length.argsort()[::-1]] #transpose a 2D list/numpy array rnn_inputs = [[] for i in range(len(contexts2charIds[0]))] for j in range(len(contexts2charIds)) : for i in range(len(contexts2charIds[j])): rnn_inputs[i].append(contexts2charIds[j][i]) self.reset_state() for i in range(len(rnn_inputs)): y_ = self(np.array(rnn_inputs[i], np.int32)) y = self.out(self.mid.h) y = y[argsort_reverse] # restore the original order return y
def __init__(self, input_size, output_size): super(StackedLSTM, self).__init__( links.LSTM(input_size, output_size), links.LSTM(output_size, output_size), #links.LSTM(output_size, output_size), )
def __init__(self, input_size, output_size): super(Encoder, self).__init__( x_f = links.LSTM(input_size, output_size), x_b = links.LSTM(input_size, output_size), f_y = links.Linear(output_size, output_size), b_y = links.Linear(output_size, output_size), )
def __init__(self, vocab_size, embed_size): super(Embed, self).__init__( c_x = links.EmbedID(0x80, 32), x_f = links.LSTM(32, embed_size), x_b = links.LSTM(32, embed_size), w_e = links.EmbedID(vocab_size, embed_size), f_e = links.Linear(embed_size, embed_size), b_e = links.Linear(embed_size, embed_size), )
def __init__(self, n_words, n_cwords, n_memory, n_output): self.n_words = n_words self.n_cwords = n_cwords self.n_memory = n_memory self.n_output = n_output super().__init__( input=cl.EmbedID(self.n_words, self.n_cwords), memory=cl.LSTM(self.n_cwords, self.n_memory), output=cl.Linear(self.n_memory, self.n_output) )
def __init__(self, n_input_units=1000,n_vocab=100, n_units=100, train=True): super(RNNLM, self).__init__( inputVector= L.Linear(n_input_units, n_units), embed=L.EmbedID(n_vocab, n_units), l1=L.LSTM(n_units, n_units), l2=L.LSTM(n_units, n_units), l3=L.Linear(n_units, n_vocab), ) self.train = train
def __init__(self, vocab, args): def get_initialW_X(shape): return np.random.normal(0, (2.0/(sum(shape)))**0.5, shape).astype(np.float32) super(DERN, self).__init__( # Word Embedding embed=L.EmbedID(len(vocab), args.n_units), # bi-LSTMs f_LSTM=L.LSTM(args.n_units, args.n_units), # for article b_LSTM=L.LSTM(args.n_units, args.n_units), Q_f_LSTM=L.LSTM(args.n_units, args.n_units), # for query Q_b_LSTM=L.LSTM(args.n_units, args.n_units), # Matrices and vectors W_hd=L.Linear(4*args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, 4*args.n_units))), W_dm=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))), m=L.Linear(args.n_units, 1, initialW=get_initialW_X((1, args.n_units))), W_hq=L.Linear(4 * args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, 4*args.n_units))), W_hu=L.Linear(4 * args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, 4*args.n_units))), W_dv=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))), W_dx=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))), W_dxQ=L.Linear(args.n_units, args.n_units, initialW=get_initialW_X((args.n_units, args.n_units))), b_v2=L.Linear(1, args.n_units, initialW=get_initialW_X((args.n_units, 1))) ) self.args = args self.n_vocab = len(vocab) self.n_units = args.n_units self.dropout_ratio = args.d_ratio self.PH_id = vocab["@placeholder"] self.eos_id = vocab["<eos>"] self.bos_id = vocab["<bos>"] self.boq_id = vocab["<boq>"] self.BOQ_tok_batch = self.xp.array([self.boq_id], dtype=np.int32) self.NULL_id = vocab["NULL_tok"] self.NULL_tok = self.xp.array(self.NULL_id, dtype=np.int32) self.initialize_additionally()
def encode_tokens(self, x_datas, i2sD, train=True): # Embed, dropout, split into each token (batchsize=1) h0L = list(F.split_axis( F.dropout( self.embed(chainer.Variable(self.xp.array(x_datas, dtype=np.int32), volatile=not train)), ratio=self.dropout_ratio, train=train), len(x_datas), axis=0)) # Replace embedding with dynamic entity representation for i in i2sD.keys(): h0L[i] = self.W_dx(i2sD[i]) # LSTM. forward order forward_outL = [] self.f_LSTM.reset_state() for h0 in h0L: state = self.f_LSTM(h0) forward_outL.append(state) # LSTM. backward order backward_outL = [] self.b_LSTM.reset_state() for h0 in reversed(h0L): state = self.b_LSTM(h0) backward_outL.append(state) return forward_outL, backward_outL
def __init__( self, g_size=8, n_steps=6, n_scales=1, var=0.03, use_lstm=False ): d_glm = 128 d_core = 256 super(RAM, self).__init__( emb_l=L.Linear(2, d_glm), emb_x=L.Linear(g_size*g_size*n_scales, d_glm), fc_lg=L.Linear(d_glm, d_core), fc_xg=L.Linear(d_glm, d_core), fc_ha=L.Linear(d_core, 10), fc_hl=L.Linear(d_core, 2), fc_hb=L.Linear(d_core, 1), ) if use_lstm: self.add_link(name='core_lstm', link=L.LSTM(d_core, d_core)) else: self.add_link(name='core_hh', link=L.Linear(d_core, d_core)) self.add_link(name='core_gh', link=L.Linear(d_core, d_core)) self.use_lstm = use_lstm self.d_core = d_core self.g_size = g_size self.n_steps = n_steps self.n_scales = n_scales self.var = var
def __call__(self, opt): self.norm_orig = np.sqrt(chainer.optimizer._sum_sqnorm( [p.grad for p in opt.target.params()])) self.norm = self.norm_orig self.rate = self.threshold / self.norm_orig if self.rate < 1: for param in opt.target.params(): grad = param.grad with cuda.get_device(grad): grad *= self.rate self.norm = self.threshold # LSTM??????????????????????????????????? # ??????decoder??LSTM??????????
def reset_state(self): for layer in self: layer.reset_state() # ?? encoder ? decoder ??????????beam search??? # ???LSTM?????????????????????
def __call__(self, hx, cx, xs, flag_train, args): if hx is None: hx = self.init_hx(xs) if cx is None: cx = self.init_hx(xs) # hx, cx ? (layer?, minibatch???????)?tensor # xs? (???, minibatch???????)?tensor # Note: chaFunc.n_step_lstm() ?????????dropout????? if args.chainer_version_check[0] == 2: hy, cy, ys = chaFunc.n_step_lstm( self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs) else: hy, cy, ys = chaFunc.n_step_lstm( self.n_layers, self.dropout_rate, hx, cx, self.ws, self.bs, xs, train=flag_train, use_cudnn=self.use_cudnn) # hy, cy ? (layer?, minibatch???????) ????? # ys???????????????????? # ???? (minibatch???????) # ??????????stack???????????chainer.Variable??? # (???, minibatch???????)?tensor hlist = chaFunc.stack(ys) return hy, cy, hlist # LSTM???????????????????????????????????
def __init__(self, input_num, action_num, max_buff_size, m, e): print("RMQN Model", input_num, action_num) super(RMQN, self).__init__( memory_module = MemoryModule(max_buff_size=max_buff_size, m=m, e=e), encoder=L.Linear(in_size=input_num, out_size=e), context=L.LSTM(in_size=e, out_size=m), quality=QualityPhi(m, action_num), )
def __init__(self, input_num, action_num, max_buff_size, m, e): assert(m == e) print("FRMQN Model", input_num, action_num) super(FRMQN, self).__init__( memory_module = MemoryModule(max_buff_size=max_buff_size, m=m, e=e), encoder=L.Linear(in_size=input_num, out_size=e), context=L.LSTM(in_size=(e+m), out_size=m), quality=QualityPhi(m, action_num), ) self.o = None
def __init__(self, input_num, action_num): print("DRQN Model", input_num, action_num) super(DRQN, self).__init__( fc1=L.Linear(input_num, 256), lstm=L.LSTM(256, 256), fc2=L.Linear(256, action_num), )
def __init__(self, n_vocab, n_units, train=True): super(charRNN, self).__init__( embed=L.EmbedID(n_vocab, n_units), l1=L.LSTM(n_units, n_units), l2=L.LSTM(n_units, n_units), l3=L.Linear(n_units, n_vocab), ) for param in self.params(): param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape) self.train = train
def __init__(self, n_layer, n_unit, n_vocab): super(AttentionNet, self).__init__( l1 = L.Linear(n_unit, n_unit), l2 = L.Linear(n_unit, n_unit), fnn = L.Linear(n_unit, 1), lstm = L.LSTM(n_unit, n_unit), dec = L.Linear(n_unit, n_vocab), )
def __init__(self, word_num, feature_num, hidden_num): super(ImageCaption, self).__init__( word_vec = L.EmbedID(word_num, hidden_num), image_vec = L.Linear(feature_num, hidden_num), lstm = L.LSTM(hidden_num, hidden_num), out_word = L.Linear(hidden_num, word_num), )
def __init__(self, vocaburary_size, img_feature_dim=2048, hidden_dim=512,dropout_ratio=0.5,train=True): self.dropout_ratio = dropout_ratio super(Image2CaptionDecoderOld, self).__init__( embed_word= L.EmbedID(vocaburary_size, hidden_dim), embed_image= L.Linear(img_feature_dim, hidden_dim), lstm = L.LSTM(hidden_dim, hidden_dim), decode_word = L.Linear(hidden_dim, vocaburary_size), ) self.train = train
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate): if args.gpu_enc != args.gpu_dec: # enc?dec??GPU??? chainer.cuda.get_device(args.gpu_enc).use() encLen = len(sentence) # ?? cMBSize = len(sentence[0]) # minibatch size # ?????embedding??? ?????????? encEmbList = self.getEncoderInputEmbeddings(sentence, args) flag_train = (train_mode > 0) lstmVars = [0] * self.n_layers * 2 if self.flag_merge_encfwbw == 0: # fw?bw?????????????? hyf, cyf, fwHout = self.model.encLSTM_f( None, None, encEmbList, flag_train, args) # ??? hyb, cyb, bkHout = self.model.encLSTM_b( None, None, encEmbList[::-1], flag_train, args) # ??? for z in six.moves.range(self.n_layers): lstmVars[2 * z] = cyf[z] + cyb[z] lstmVars[2 * z + 1] = hyf[z] + hyb[z] elif self.flag_merge_encfwbw == 1: # fw?bw???????? sp = (cMBSize, self.hDim) for z in six.moves.range(self.n_layers): if z == 0: # ??? embedding??? biH = encEmbList else: # ????? ???????? # ????????bkHout???????????? biH = fwHout + bkHout[::-1] # z????? hyf, cyf, fwHout = self.model.encLSTM_f( z, biH, flag_train, dropout_rate, args) # z?????? hyb, cyb, bkHout = self.model.encLSTM_b( z, biH[::-1], flag_train, dropout_rate, args) # ?????????????????????????? # ??????? lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp) lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp) else: assert 0, "ERROR" # ????? if self.flag_enc_boseos == 0: # default # fwHout?[:,]??????????? biHiddenStack = fwHout[:, ] + bkHout[::-1] elif self.flag_enc_boseos == 1: bkHout2 = bkHout[::-1] # ????? biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ] # BOS, EOS?????? TODO ??????0?????????? encLen -= 2 else: assert 0, "ERROR" # (enc????, minibatch??, ??????) # => (minibatch??, enc????, ??????)??? biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1) # ?LSTM???????????decoder?LSTM???????? lstmVars = chaFunc.stack(lstmVars) # encoder????encInfoObject??????? retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize) return retO