我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.functions.concat()。
def encode(self,xs): xs = [x + [2] for x in xs] # 1?<s>????dec??<s>?????? xs_f = self.makeEmbedBatch(xs) xs_b = self.makeEmbedBatch(xs, True) self.enc_f.reset_state() self.enc_b.reset_state() ys_f = self.enc_f(xs_f) ys_b = self.enc_b(xs_b) # VAE mu_arr = [self.le2_mu(F.concat((hx_f, cx_f, hx_b, cx_b))) for hx_f, cx_f, hx_b, cx_b in zip(self.enc_f.hx, self.enc_f.cx, self.enc_b.hx, self.enc_b.cx)] var_arr = [self.le2_ln_var(F.concat((hx_f, cx_f, hx_b, cx_b))) for hx_f, cx_f, hx_b, cx_b in zip(self.enc_f.hx, self.enc_f.cx, self.enc_b.hx, self.enc_b.cx)] return mu_arr,var_arr
def __call__(self, x, train=False): pool = Inception_A.max_or_ave(self.pool) if self.stride == 1: a = self.double_convnxn_1(x, train) a = self.double_convnxn_2(a, train) a = self.double_convnxn_3(a, train) a = self.double_convnxn_4(a, train) a = self.double_convnxn_5(a, train) b = self.convnxn_1(x, train) b = self.convnxn_2(b, train) b = self.convnxn_3(b, train) c = pool(x, ksize=3, stride=self.stride, pad=1) c = self.conv_pool(c, train) d = self.conv1x1(x, train) return F.concat((a, b, c, d), axis=1) else: a = self.double_convnxn_1(x, train) a = self.double_convnxn_2(a, train) a = self.double_convnxn_3(a, train) a = self.double_convnxn_4(a, train) b = self.convnxn_1(x, train) b = self.convnxn_2(b, train) c = pool(x, ksize=3, stride=self.stride, pad=1) return F.concat((a, b, c), axis=1)
def convert(batch, device): def to_device_batch(batch): if device is None: return batch elif device < 0: return [chainer.dataset.to_device(device, x) for x in batch] else: xp = cuda.cupy.get_array_module(*batch) concat = xp.concatenate(batch, axis=0) sections = np.cumsum([len(x) for x in batch[:-1]], dtype='i') concat_dev = chainer.dataset.to_device(device, concat) batch_dev = cuda.cupy.split(concat_dev, sections) return batch_dev return {'xs': to_device_batch([x for x, _ in batch]), 'ys': to_device_batch([y for _, y in batch])}
def __call__(self, ht, xs, d_bar_s_1): #ht:encoder????????????????? #batch_size * n_words * in_size #xs:?????? if d_bar_s_1 == None: d_bar_s_1 = np.zeros(self.in_size) ht_T = list(map(F.transpose, ht)) phi_ht = list(map(W1, ht_T)) d_s = rnn(d_bar_s_1, y_s_1) phi_d = F.transpose_sequence(W2(F.transpose_sequence(d_s))) u_st = list(map(lambda x: phi_d*x, phi_ht)) #(4) sum_u = F.sum(u_st) alpha_st = list(map(lambda x:x/sum_u, u_st)) #(3) z_s = F.argmax(alpha_st, axis=0) c_s = F.sum(list(map(lambda x,y:x*y , alpha_st, ht))) #(2) d_bar_s = F.relu(W3(F.concat([c_s, d_s]))) return d_bar_s, d_s, c_s, z_s
def forward(self, pretrained_word_tokens, word_tokens, pos_tokens): X = [] batch = len(word_tokens) for i in range(batch): xs_words_pretrained = \ self.embed[0](self.xp.array(pretrained_word_tokens[i])) xs_words = self.embed[1](self.xp.array(word_tokens[i])) xs_words += xs_words_pretrained xs_tags = self.embed[2](self.xp.array(pos_tokens[i])) xs = F.concat([ teras_F.dropout(xs_words, self.embed._dropout_ratio), teras_F.dropout(xs_tags, self.embed._dropout_ratio)]) X.append(xs) R = self.blstm(X) R = F.pad_sequence(R) H_arc_dep = self.mlp_arc_dep(R) H_arc_head = self.mlp_arc_head(R) arc_logits = self.arc_biaffine(H_arc_dep, H_arc_head) arc_logits = F.squeeze(arc_logits, axis=3) H_label_dep = self.mlp_label_dep(R) H_label_head = self.mlp_label_head(R) label_logits = self.label_biaffine(H_label_dep, H_label_head) return arc_logits, label_logits
def forward(self, ws, ss, ps): batchsize = len(ws) xp = chainer.cuda.get_array_module(ws[0]) ws = map(self.emb_word, ws) ss = [F.reshape(self.emb_suf(s), (s.shape[0], 4 * self.afix_dim)) for s in ss] ps = [F.reshape(self.emb_prf(s), (s.shape[0], 4 * self.afix_dim)) for s in ps] xs_f = [F.dropout(F.concat([w, s, p]), self.dropout_ratio, train=self.train) for w, s, p in zip(ws, ss, ps)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)] cat_ys = [self.linear_cat2( F.dropout(F.elu(self.linear_cat1(h)), 0.5, train=self.train)) for h in hs] dep_ys = [self.biaffine( F.elu(F.dropout(self.linear_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs] return cat_ys, dep_ys
def forward(self, ws, ss, ps): batchsize = len(ws) xp = chainer.cuda.get_array_module(ws[0]) ws = map(self.emb_word, ws) ss = [F.reshape(self.emb_suf(s), (s.shape[0], 4 * self.afix_dim)) for s in ss] ps = [F.reshape(self.emb_prf(s), (s.shape[0], 4 * self.afix_dim)) for s in ps] # [(sentence length, (word_dim + suf_dim + prf_dim))] xs_f = [F.dropout(F.concat([w, s, p]), self.dropout_ratio, train=self.train) for w, s, p in zip(ws, ss, ps)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] ys = [self.linear2(F.relu( self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, hs_b)] return ys
def predict(self, xs): """ batch: list of splitted sentences """ xs = [self.extractor.process(x) for x in xs] batchsize = len(xs) ws, cs, ls = zip(*xs) ws = map(self.emb_word, ws) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, hs_b)] return [y.data[1:-1] for y in ys]
def __call__(self, xs): """ xs: (batchsize, hidden_dim) """ if self.h is not None: h = self.h c = self.c else: xp = chainer.cuda.get_array_module(xs.data) batchsize = xs.shape[0] h = Variable(xp.zeros((batchsize, self.outsize), 'f'), volatile='AUTO') c = Variable(xp.zeros((batchsize, self.outsize), 'f'), volatile='AUTO') in_gate = F.sigmoid(self.linear_in(F.concat([xs, h, c]))) new_in = F.tanh(self.linear_c(F.concat([xs, h]))) self.c = in_gate * new_in + (1. - in_gate) * c out_gate = F.sigmoid(self.linear_out(F.concat([xs, h, self.c]))) self.h = F.tanh(self.c) * out_gate return self.h
def __call__(self, xs, ts): """ Inputs: xs (tuple(Variable, Variable, Variable)): each of Variables is of dim (batchsize,) ts Variable: (batchsize) """ words, suffixes, caps = xs[:,:7], xs[:, 7:14], xs[:, 14:] h_w = self.emb_word(words) h_c = self.emb_caps(caps) h_s = self.emb_suffix(suffixes) h = F.concat([h_w, h_c, h_s], 2) batchsize, ntokens, hidden = h.data.shape h = F.reshape(h, (batchsize, ntokens * hidden)) ys = self.linear(h) loss = F.softmax_cross_entropy(ys, ts) acc = F.accuracy(ys, ts) chainer.report({ "loss": loss, "accuracy": acc }, self) return loss
def __call__(self, ws, cs, ls, ts): h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim) h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim) batchsize, windowsize, _, _ = h_c.data.shape # (batchsize, windowsize, char_dim) h_c = F.sum(h_c, 2) h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1))) h_c = h_c / ls h = F.concat([h_w, h_c], 2) h = F.reshape(h, (batchsize, -1)) # ys = self.linear1(h) h = F.relu(self.linear1(h)) h = F.dropout(h, ratio=.5, train=self.train) ys = self.linear2(h) loss = F.softmax_cross_entropy(ys, ts) acc = F.accuracy(ys, ts) chainer.report({ "loss": loss, "accuracy": acc }, self) return loss
def __call__(self, x): if not hasattr(self, 'encoding') or self.encoding is None: self.batch_size = x.shape[0] self.init() dims = len(x.shape) - 1 f, z, o = F.split_axis(self.pre(x), 3, axis=dims) f = F.sigmoid(f) z = (1 - f) * F.tanh(z) o = F.sigmoid(o) if dims == 2: self.c = strnn(f, z, self.c[:self.batch_size]) else: self.c = f * self.c + z if self.attention: context = attention_sum(self.encoding, self.c) self.h = o * self.o(F.concat((self.c, context), axis=dims)) else: self.h = self.c * o self.x = x return self.h
def forward(self, ws, ss, ps): batchsize, length = ws.shape xp = chainer.cuda.get_array_module(ws[0]) ws = self.emb_word(ws) # (batch, length, word_dim) ss = F.reshape(self.emb_suf(ss), (batchsize, length, -1)) ps = F.reshape(self.emb_prf(ps), (batchsize, length, -1)) hs = F.transpose(F.concat([ws, ss, ps], 2), (1, 0, 2)) hs = F.dropout(hs, self.dropout_ratio, train=self.train) hs = F.split_axis(hs, length, 0) hs_f = [] hs_b = [] self._init_state() for h_in_f, h_in_b in zip(hs, reversed(hs)): h_f = self.lstm_f2(self.lstm_f1(F.squeeze(h_in_f, 0))) hs_f.append(h_f) h_b = self.lstm_b2(self.lstm_b1(F.squeeze(h_in_b, 0))) hs_b.append(h_b) ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, reversed(hs_b))] return ys
def __init__(self, src_vcb_num, trg_vcb_num, dim_emb, dim_hid, attention_type='concat'): super().__init__(src_vcb_num, trg_vcb_num, dim_emb, dim_hid) atten_components = get_attention_components(attention_type, dim_hid) for k, v in atten_components.items(): self.add_link(k, v) self.attention_type = attention_type
def decode_once(self, x, state, train=True): l = state.get('lengths', self.lengths) c = state['c'] h = state['h'] h_tilde = state.get('h_tilde', None) emb = self.trg_emb(x) lemb = self.len_emb(l) lstm_in = self.eh(emb) + self.hh(h) + self.lh(lemb) if h_tilde is not None: lstm_in += self.ch(h_tilde) c, h = F.lstm(c, lstm_in) a = self.attender(h, train=train) h_tilde = F.concat([a, h]) h_tilde = F.tanh(self.w_c(h_tilde)) o = self.ho(h_tilde) state['c'] = c state['h'] = h state['h_tilde'] = h_tilde return o, state
def __call__(self, x, train=True): hlist = [] h_0 = self['embed'](x) if not self.non_static: h_0 = Variable(h_0.data) h_1 = F.reshape(h_0, (h_0.shape[0], 1, h_0.shape[1], h_0.shape[2])) for filter_h in self.filter_sizes: pool_size = (self.doc_length - filter_h + 1, 1) h = F.max_pooling_2d(F.relu(self['conv' + str(filter_h)](h_1)), pool_size) hlist.append(h) h = F.concat(hlist) pos = 0 while pos < len(self.hidden_units) - 1: h = F.dropout(F.relu(self['l' + str(pos)](h))) pos += 1 y = F.relu(self['l' + str(pos)](h)) return y
def decode(self,z): # pdb.set_trace() a = self.a_enc # If this function is coming from the sampling call, the batch size of z and a won't match. Manually handle that here. if (a.shape[0]!=z.shape[0]): a.volatile = 'ON' batch_size = z.shape[0] a.data = a.data[0:batch_size,:] net_input = F.concat((z,a), axis=1) h = F.crelu(self.plinx0(net_input)) for i in range(self.num_layers-1): layer_name = 'plinx' + str(i+1) h = F.crelu(self[layer_name](h)) self.pmu = self.plinx_mu(h) self.pln_var = self.plinx_ln_var(h) return self.pmu, self.pln_var
def encode_z(self, x, a): # a = F.gaussian(self.qmu_a, self.qln_var_a) # This should be outside the encoding function. Pass the function a. net_input = F.concat((x,a), axis=1) h = self.qlinz0(net_input) h = self.qlinz_batch_norm_0(h) h = F.crelu(h) for i in range(self.num_layers-1): layer_name = 'qlinz' + str(i+1) h = self[layer_name](h) layer_name = 'qlinz_batch_norm_' + str(i+1) h = self[layer_name](h) h = F.crelu(h) self.qmu_z = self.qlinz_mu(h) self.qln_var_z = self.qlinz_ln_var(h) return self.qmu_z, self.qln_var_z
def decode_a(self, z, x): net_input = F.concat((x,z), axis=1) h = self.plina0(net_input) h = self.plina_batch_norm_0(h) h = F.crelu(h) for i in range(self.num_layers-1): layer_name = 'plina' + str(i+1) h = self[layer_name](h) layer_name = 'plina_batch_norm_' + str(i+1) h = self[layer_name](h) h = F.crelu(h) self.pmu_a = self.plina_mu(h) self.pln_var_a = self.plina_ln_var(h) return self.pmu_a, self.pln_var_a
def decode_a(self, z): # net_input = F.concat((x,z), axis=1) h = self.plina0(z) h = self.plina_batch_norm_0(h) h = F.crelu(h) for i in range(self.num_layers-1): layer_name = 'plina' + str(i+1) h = self[layer_name](h) layer_name = 'plina_batch_norm_' + str(i+1) h = self[layer_name](h) h = F.crelu(h) self.pmu_a = self.plina_mu(h) self.pln_var_a = self.plina_ln_var(h) return self.pmu_a, self.pln_var_a
def decode(self,z): # pdb.set_trace() a = self.a_enc # If this function is coming from the sampling call, the batch size of z and a won't match. Manually handle that here. if (a.shape[0]!=z.shape[0]): a.volatile = 'ON' batch_size = z.shape[0] a.data = a.data[0:batch_size,:] net_input = F.concat((z,a), axis=1) h = F.crelu(self.plinx0(net_input)) for i in range(self.num_layers-1): layer_name = 'plinx' + str(i+1) h = F.crelu(self[layer_name](h)) self.p_ber_prob_logit = self.plinx_ber_prob(h) return self.p_ber_prob_logit
def encode(self, X, skip_mask=None): batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.encoder_embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) last_hidden_states = [] for layer_index in range(0, self.num_layers): encoder = self.get_encoder(layer_index) last_hidden_states.append(encoder.get_last_hidden_state()) return last_hidden_states
def __call__(self, X, return_last=False): batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_layer(0, enmbedding) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1]) # dense conv in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else out_data # dense conv if return_last: out_data = out_data[:, :, -1, None] if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size)) return out_data
def Q_func(self, state): if state.ndim == 2: agent_state = state[:, - self.agent_state_dim :] market_state = state[:,:self.market_state_dim] elif state.ndim == 3: agent_state = state[:, :,- self.agent_state_dim :] market_state = state[:,:,:self.market_state_dim] a_state = Variable(agent_state) m_state = Variable(market_state) a = F.tanh(self.a1(a_state)) a = F.tanh(self.a2(a)) a = F.tanh(self.a3(a)) m = F.tanh(self.s1(m_state)) m = F.tanh(self.s2(m)) m = F.tanh(self.s3(m)) new_state = F.concat((a, m), axis=1) h = F.tanh(self.fc4(new_state)) h = F.tanh(self.fc5(h)) Q = self.q_value(h) return Q
def __call__(self, x, z, ze, mask, conv_mask): att_scale = self.xp.sum( mask, axis=2, keepdims=True)[:, None, :, :] ** 0.5 pad = self.xp.zeros( (x.shape[0], x.shape[1], self.width - 1, 1), dtype=x.dtype) base_x = x z = F.squeeze(z, axis=3) # Note: these behaviors of input, output, and attention result # may refer to the code by authors, which looks little different # from the paper's saying. for conv_name, preatt_name in zip(self.conv_names, self.preatt_names): # Calculate Output of GLU out = getattr(self, conv_name)( F.concat([pad, x], axis=2), conv_mask) # Calcualte Output of Attention using Output of GLU preatt = seq_linear(getattr(self, preatt_name), out) query = base_x + preatt query = F.squeeze(query, axis=3) c = self.attend(query, z, ze, mask) * att_scale # Merge Them in Redidual Calculation and Scaling x = (x + (c + out) * scale05) * scale05 return x
def __call__(self, pmap, fmap, cmap): fmap = self.conv0(fmap) fmap = F.relu(fmap) cmap = F.average_pooling_2d(cmap, ksize=8, stride=8) h = F.concat((fmap, pmap, cmap), 1) h = self.conv1(h) h = F.relu(h) h = self.conv2(h) h = F.relu(h) h = self.conv3(h) h = F.relu(h) h = self.conv4(h) h = F.relu(h) h = self.conv5(h) return h
def __call__(self, xs): if self.freeze: self.embed.disable_update() xs = self.embed(xs) batchsize, height, width = xs.shape xs = F.reshape(xs, (batchsize, 1, height, width)) conv3_xs = self.conv3(xs) conv4_xs = self.conv4(xs) conv5_xs = self.conv5(xs) h1 = F.max_pooling_2d(F.relu(conv3_xs), conv3_xs.shape[2]) h2 = F.max_pooling_2d(F.relu(conv4_xs), conv4_xs.shape[2]) h3 = F.max_pooling_2d(F.relu(conv5_xs), conv5_xs.shape[2]) concat_layer = F.concat([h1, h2, h3], axis=1) with chainer.using_config('train', True): y = self.l1(F.dropout(F.tanh(concat_layer))) return y
def forward(self, data): ep_list = [self.p_embed(d[0], d[1]) for d in data] ec_list = [self.c_embed(d[0], d[1]) for d in data] er_list = [self.r_embed(d[0], d[1]) for d in data] p_list = self.p_encode(ep_list) c_list = self.c_encode(ec_list) r_list = self.r_encode(er_list) P = functions.reshape( functions.concat(p_list, 0), (1, len(data), self.hidden_size)) C = functions.reshape( functions.concat(c_list, 0), (1, len(data), self.hidden_size)) R = functions.concat(r_list, 0) parent_scores = functions.reshape( functions.batch_matmul(C, P, transb=True), (len(data), len(data))) root_scores = functions.reshape( self.r_scorer(R), (1, len(data))) return parent_scores, root_scores
def __call__(self): mem_optimize = nmtrain.optimization.chainer_mem_optimize # Calculate Attention vector a = self.attention(self.S, self.h) # Calculate context vector c = F.squeeze(F.batch_matmul(self.S, a, transa=True), axis=2) # Calculate hidden vector + context self.ht = self.context_project(F.concat((self.h, c), axis=1)) # Calculate Word probability distribution y = mem_optimize(self.affine_vocab, F.tanh(self.ht), level=1) if self.use_lexicon: y = self.lexicon_model(y, a, self.ht, self.lexicon_matrix) if nmtrain.environment.is_train(): return nmtrain.models.decoders.Output(y=y) else: # Return the vocabulary size output projection return nmtrain.models.decoders.Output(y=y, a=a)
def convert(batch, device): def to_device_batch(batch): if device is None: return batch elif device < 0: return [chainer.dataset.to_device(device, x) for x in batch] else: xp = cuda.cupy.get_array_module(*batch) concat = xp.concatenate(batch, axis=0) sections = numpy.cumsum([len(x) for x in batch[:-1]], dtype='i') concat_dev = chainer.dataset.to_device(device, concat) batch_dev = cuda.cupy.split(concat_dev, sections) return batch_dev return tuple( to_device_batch([x for x, _ in batch]) + to_device_batch([y for _, y in batch]))
def update_core(self): batch = self.get_iterator('main').next() A = data_process([A for A,B in batch], self.converter, self.device) B = data_process([B for A,B in batch], self.converter, self.device) real_AB = F.concat((A, B)) fake_B = self.G(A, test=False) fake_AB = F.concat((A, fake_B)) real_D = self.D(real_AB, test=False) fake_D = self.D(fake_AB, test=False) optimizer_G = self.get_optimizer('main') optimizer_D = self.get_optimizer('D') optimizer_D.update(self.loss_D, real_D, fake_D) optimizer_G.update(self.loss_G, B, fake_B, fake_D)
def __call__(self, x, test=False, dropout=True): e1 = self.c1(x) e2 = self.b2(self.c2(F.leaky_relu(e1)), test=test) e3 = self.b3(self.c3(F.leaky_relu(e2)), test=test) e4 = self.b4(self.c4(F.leaky_relu(e3)), test=test) e5 = self.b5(self.c5(F.leaky_relu(e4)), test=test) e6 = self.b6(self.c6(F.leaky_relu(e5)), test=test) e7 = self.b7(self.c7(F.leaky_relu(e6)), test=test) e8 = self.b8(self.c8(F.leaky_relu(e7)), test=test) d1 = F.concat((F.dropout(self.b1_d(self.dc1(F.relu(e8)), test=test), train=dropout), e7)) d2 = F.concat((F.dropout(self.b2_d(self.dc2(F.relu(d1)), test=test), train=dropout), e6)) d3 = F.concat((F.dropout(self.b3_d(self.dc3(F.relu(d2)), test=test), train=dropout), e5)) d4 = F.concat((self.b4_d(self.dc4(F.relu(d3)), test=test), e4)) d5 = F.concat((self.b5_d(self.dc5(F.relu(d4)), test=test), e3)) d6 = F.concat((self.b6_d(self.dc6(F.relu(d5)), test=test), e2)) d7 = F.concat((self.b7_d(self.dc7(F.relu(d6)), test=test), e1)) y = F.tanh(self.dc8(F.relu(d7))) return y
def __call__(self, x1, x2): xp = self.xp out_size = self.out_size batch_size, len1, dim1 = x1.shape if not self.nobias[0]: x1 = F.concat((x1, xp.ones((batch_size, len1, 1), dtype=xp.float32)), axis=2) dim1 += 1 len2, dim2 = x2.shape[1:] if not self.nobias[1]: x2 = F.concat((x2, xp.ones((batch_size, len2, 1), dtype=xp.float32)), axis=2) dim2 += 1 x1_reshaped = F.reshape(x1, (batch_size * len1, dim1)) W_reshaped = F.reshape(F.transpose(self.W, (0, 2, 1)), (dim1, out_size * dim2)) affine = F.reshape(F.matmul(x1_reshaped, W_reshaped), (batch_size, len1 * out_size, dim2)) biaffine = F.transpose( F.reshape(batch_matmul(affine, x2, transb=True), (batch_size, len1, out_size, len2)), (0, 1, 3, 2)) if not self.nobias[2]: biaffine += F.broadcast_to(self.b, biaffine.shape) return biaffine
def __call__(self, xs: List[Variable], ys: List[Variable]) -> Variable: batch_size = len(xs) xs = [x[::-1] for x in xs] eos = np.array([EOS], dtype=np.int32) ys_in = [F.concat((eos, y), axis=0) for y in ys] ys_out = [F.concat((y, eos), axis=0) for y in ys] embedded_xs = [self._embed_input(x) for x in xs] embedded_ys = [self._embed_output(y) for y in ys_in] hidden_states, cell_states, attentions = self._encoder(None, None, embedded_xs) _, _, embedded_outputs = self._decoder(hidden_states, cell_states, embedded_ys) loss = 0 for embedded_output, y, attention in zip(embedded_outputs, ys_out, attentions): if self._use_attention: output = self._calculate_attention_layer_output(embedded_output, attention) else: output = self._extract_output(embedded_output) loss += F.softmax_cross_entropy(output, y) loss /= batch_size return loss
def __call__(self, x_0: chainer.Variable, x_1: chainer.Variable) -> typing.List[chainer.Variable]: hs = [] h = self.c0_0(x_0) if self.will_concat: h = F.concat([h, self.c0_1(x_1)]) h = self.c1(h) hs.append(self.out_1(chainer.functions.average_pooling_2d(h, (h.shape[2], h.shape[3])))) # hs.append(chainer.functions.average_pooling_2d h = self.c2(h) hs.append(self.out_2(chainer.functions.average_pooling_2d(h, (h.shape[2], h.shape[3])))) h = self.c3(h) h = self.c4(h) hs.append(h) return hs
def __call__(self, X): # generate random values R = np.random.randn(X.data.shape[0], self.rand_sz) R = Variable(R.astype("float32")) # attach random to the inputs h = F.concat([R, X]) #h = R h = self.ipt(h) #h = F.dropout(h) y = self.out(h) # prior knowledge: environment observation is one - hot vector obs = F.softmax(y[:, :-2]) # prior knowledge: reward is in [0,1] rew = F.sigmoid(y[:,[-2]]) fin = F.sigmoid(y[:, [-1]]) y = F.concat([obs, rew, fin]) return y
def processDecLSTMOneStep(self, decInputEmb, lstm_states_in, finalHS, args, dropout_rate): # 1, RNN??????????? # ?beam search????????????????? self.model.decLSTM.setAllLSTMStates(lstm_states_in) # 2, ??????????input feed??? if self.flag_dec_ifeed == 0: # inputfeed????? wenbed = decInputEmb elif self.flag_dec_ifeed == 1: # inputfeed??? (default) wenbed = chaFunc.concat((finalHS, decInputEmb)) # elif self.flag_dec_ifeed == 2: # decInputEmb????? (debug?) # wenbed = finalHS else: assert 0, "ERROR" # 3? N???RNN??????? h1 = self.model.decLSTM.processOneStepForward( wenbed, args, dropout_rate) # 4, ???????????LSTM??????? lstm_states_out = self.model.decLSTM.getAllLSTMStates() return h1, lstm_states_out # attention???
def __call__(self, x): minibatch_size = x.shape[0] activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim)) activation_ex = F.expand_dims(activation, 3) activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0) activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t) diff = activation_ex - activation_ex_t xp = chainer.cuda.get_array_module(x.data) eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1) eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size)) sum_diff = F.sum(abs(diff), axis=2) sum_diff = F.broadcast_to(sum_diff, eps.shape) abs_diff = sum_diff + eps minibatch_features = F.sum(F.exp(-abs_diff), 2) return F.concat((x, minibatch_features), axis=1)
def __call__(self, x): xp = chainer.cuda.get_array_module(x.data) batchsize = x.shape[0] if self.train_weights == False and self.initial_T is not None: self.T.W.data = self.initial_T M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel)) M = F.expand_dims(M, 3) M_T = F.transpose(M, (3, 1, 2, 0)) M, M_T = F.broadcast(M, M_T) norm = F.sum(abs(M - M_T), axis=2) eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape) c_b = F.exp(-(norm + 1e6 * eraser)) o_b = F.sum(c_b, axis=2) if self.train_weights == False: self.initial_T = self.T.W.data return F.concat((x, o_b), axis=1)