我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用chainer.functions.split_axis()。
def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) fs = [self.extractor.process(x)[:2] for x in xs] ws, cs = concat_examples(fs, padding=IGNORE) cat_ys, dep_ys = self.forward(ws, cs) cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1)) # dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1)) cat_ys = [F.log_softmax( F.reshape(y, (y.shape[1], -1))[1:len(x) + 1]).data for x, y in \ zip(xs, F.split_axis(cat_ys, batchsize, 0))] dep_ys = [F.log_softmax(y[1:len(x) + 1, :len(x) + 1]).data \ for x, y in zip(xs, dep_ys)] assert len(cat_ys) == len(dep_ys) return zip(cat_ys, dep_ys)
def __call__(self, ws, ss, ps, ts): """ xs [(w,s,p,y), ..., ] w: word, s: suffix, p: prefix, y: label """ batchsize, length = ts.shape ys = self.forward(ws, ss, ps)[1:-1] ts = [F.squeeze(x, 0) for x in F.split_axis(F.transpose(ts), length, 0)] loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)]) acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)]) acc /= length chainer.report({ "loss": loss, "accuracy": acc }, self) return loss
def forward(self, ws, ss, ps): batchsize, length = ws.shape xp = chainer.cuda.get_array_module(ws[0]) ws = self.emb_word(ws) # (batch, length, word_dim) ss = F.reshape(self.emb_suf(ss), (batchsize, length, -1)) ps = F.reshape(self.emb_prf(ps), (batchsize, length, -1)) hs = F.transpose(F.concat([ws, ss, ps], 2), (1, 0, 2)) hs = F.dropout(hs, self.dropout_ratio, train=self.train) hs = F.split_axis(hs, length, 0) hs_f = [] hs_b = [] self._init_state() for h_in_f, h_in_b in zip(hs, reversed(hs)): h_f = self.lstm_f2(self.lstm_f1(F.squeeze(h_in_f, 0))) hs_f.append(h_f) h_b = self.lstm_b2(self.lstm_b1(F.squeeze(h_in_b, 0))) hs_b.append(h_b) ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, reversed(hs_b))] return ys
def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) xs = [self.extractor.process(x) for x in xs] ws, ss, ps = concat_examples(xs, padding=IGNORE) cat_ys, dep_ys = self.forward(ws, ss, ps) cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1)) dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1)) cat_ys = [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \ zip(xs, F.split_axis(cat_ys, batchsize, 0))] dep_ys = [F.squeeze(F.log_softmax(y[1:len(x) + 1, :-1]), 0).data \ for x, y in zip(xs, F.split_axis(dep_ys, batchsize, 0))] return cat_ys, dep_ys
def __call__(self, x, split_into_variables=True): batchsize = x.shape[0] seq_length = x.shape[3] out_data = super(AcousticModel, self).__call__(x) assert out_data.shape[3] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 3) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 3) out_data = F.squeeze(out_data, axis=2) return out_data
def __call__(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[..., :-pad] Vh = self.V(ht_enc) # copy Vh # e.g. # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[11, 12, 13]] # # Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX) # # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[[ 11 11 11] # [ 12 12 12] # [ 13 13 13] Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def _setup_slice(self, layer): if layer.slice_param.HasField('axis'): axis = layer.slice_param.axis elif layer.slice_param.HasField('slice_dim'): axis = layer.slice_param.slice_dim else: axis = 1 if layer.slice_param.slice_point: indices_or_sections = list(layer.slice_param.slice_point) else: indices_or_sections = len(list(layer.top)) self.forwards[layer.name] = _SingleArgumentFunction( functions.split_axis, indices_or_sections=indices_or_sections, axis=axis ) self._add_layer(layer)
def initialize_entities(self, entities, max_entnum, train=True): e2sD = {} old2newD = {} if train: news = self.xp.random.randint(0, max_entnum, len(entities)) else: news = entities new_e_L = [] for new, entity in zip(news, entities): old2newD[entity] = int(new) new_e_L.append(new) es_L = F.split_axis( self.embed(chainer.Variable(self.xp.array(new_e_L, dtype=np.int32), volatile=not train)), len(new_e_L), axis=0) if len(new_e_L) <= 1: es_L = [es_L] for new_e, es in zip(new_e_L, es_L): e2sD[new_e] = es return old2newD, e2sD
def __call__(self, chars): if not isinstance(chars, (tuple, list)): chars = [chars] char_ids, boundaries = self._create_sequence(chars) x = self.embed(self.xp.array(char_ids)) x = F.dropout(x, self._dropout) length, dim = x.shape C = self.conv(F.reshape(x, (1, 1, length, dim))) # C.shape -> (1, out_size, length, 1) C = F.split_axis(F.transpose(F.reshape(C, (self.out_size, length))), boundaries, axis=0) ys = F.max(F.pad_sequence( [matrix for i, matrix in enumerate(C) if i % 2 == 1], padding=-np.inf), axis=1) # max over time pooling # assert len(chars) == ys.shape[0] return ys
def calc_log_posterior(theta, x, n=None): """Calculate unnormalized log posterior, ``log p(theta | x) + C`` Args: theta(chainer.Variable): model parameters x(numpy.ndarray): sample data n(int): total data size Returns: chainer.Variable: Variable that holding unnormalized log posterior, ``log p(theta | x) + C`` of shape ``()`` """ theta1, theta2 = F.split_axis(theta, 2, 0) log_prior1 = F.sum(F.log(gaussian.gaussian_likelihood(theta1, 0, VAR1))) log_prior2 = F.sum(F.log(gaussian.gaussian_likelihood(theta2, 0, VAR2))) prob1 = gaussian.gaussian_likelihood(x, theta1, VAR_X) prob2 = gaussian.gaussian_likelihood(x, theta1 + theta2, VAR_X) log_likelihood = F.sum(F.log(prob1 / 2 + prob2 / 2)) if n is not None: log_likelihood *= n / len(x) return log_prior1 + log_prior2 + log_likelihood
def __call__(self, X): # remove right paddings # e.g. # kernel_size = 3 # pad = 2 # input sequence with paddings: # [0, 0, x1, x2, x3, 0, 0] # |< t1 >| # |< t2 >| # |< t3 >| pad = self._kernel_size - 1 WX = self.W(X)[:, :, :-pad] A, B = functions.split_axis(WX, 2, axis=1) self.H = A * functions.sigmoid(B) return self.H
def __call__(self, x): if not hasattr(self, 'encoding') or self.encoding is None: self.batch_size = x.shape[0] self.init() dims = len(x.shape) - 1 f, z, o = F.split_axis(self.pre(x), 3, axis=dims) f = F.sigmoid(f) z = (1 - f) * F.tanh(z) o = F.sigmoid(o) if dims == 2: self.c = strnn(f, z, self.c[:self.batch_size]) else: self.c = f * self.c + z if self.attention: context = attention_sum(self.encoding, self.c) self.h = o * self.o(F.concat((self.c, context), axis=dims)) else: self.h = self.c * o self.x = x return self.h
def __call__(self, x, margin_factor=1.0, train=True): """ Embed samples using the CNN, then calculate distances and triplet loss. x is a batch of size 3n following the form: | anchor_1 | | [...] | | anchor_n | | positive_1 | | [...] | | positive_n | | negative_1 | | [...] | | negative_n | """ anc, pos, neg = (self.embed(h) for h in F.split_axis(x, 3, 0)) dist_pos, dist_neg = self.squared_distance(anc, pos, neg) mf = margin_factor if train else 1.0 # no margin when testing return self.compute_loss(dist_pos, dist_neg, mf)
def __call__(self, s, xs): """Calculate all hidden states and cell states. Args: s (~chainer.Variable or None): Initial (hidden & cell) states. If ``None`` is specified zero-vector is used. xs (list of ~chianer.Variable): List of input sequences. Each element ``xs[i]`` is a :class:`chainer.Variable` holding a sequence. Return: (hy,cy): a pair of hidden and cell states at the end of the sequence, ys: a hidden state sequence at the last layer """ if len(xs) > 1: sections = np.cumsum(np.array([len(x) for x in xs[:-1]], dtype=np.int32)) xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0) else: xs = [ self.embed(xs[0]) ] if s is not None: hy, cy, ys = self.lstm(s[0], s[1], xs) else: hy, cy, ys = self.lstm(None, None, xs) return (hy,cy), ys
def predict(self, input_x): if isinstance(input_x, chainer.Variable): device = cuda.get_device(input_x.data) else: device = cuda.get_device(input_x) xp = self.predictor.xp with device: output = self.predictor(input_x) batch_size, input_channel, input_h, input_w = input_x.shape batch_size, _, grid_h, grid_w = output.shape x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) y = F.sigmoid(y) conf = F.sigmoid(conf) prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) prob = F.transpose(prob, (0, 2, 1, 3, 4)) # convert coordinates to those on the image x_shift = xp.asarray(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape)) y_shift = xp.asarray(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape)) w_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape)) h_anchor = xp.asarray(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape)) box_x = (x + x_shift) / grid_w box_y = (y + y_shift) / grid_h box_w = F.exp(w) * w_anchor / grid_w box_h = F.exp(h) * h_anchor / grid_h return box_x, box_y, box_w, box_h, conf, prob
def makeEmbedBatch(self, xs, reverse=False): if reverse: xs = [xp.asarray(x[::-1], dtype=xp.int32) for x in xs] elif not reverse: # xs = xp.asarray(xs,dtype=xp.int32) xs = [xp.asarray(x, dtype=xp.int32) for x in xs] section_pre = np.array([len(x) for x in xs[:-1]], dtype=np.int32) sections = np.cumsum(section_pre) # CuPy does not have cumsum() xs = F.split_axis(self.embed(F.concat(xs, axis=0)), sections, axis=0) return xs
def vectorize(args,encdec,sent_arr): tt_batch = [[encdec.vocab.stoi(char) for char in word_arr.split(" ")] for word_arr in sent_arr] mu_arr,var_arr = encdec.encode(tt_batch) mu_arr = mu_arr[0] mu_arr = F.split_axis(mu_arr, len(sent_arr), axis=0) var_arr = var_arr[0] var_arr = F.split_axis(var_arr, len(sent_arr), axis=0) # print("cossim:{}".format(cosSim(mu_arr[0].data[0],mu_arr[1].data[0]))) return mu_arr,var_arr
def weighted_cross_entropy(p,t,weight_arr,sec_arr,weigh_flag=True): print("p:{}".format(p.data.shape)) b = np.zeros(p.shape,dtype=np.float32) b[np.arange(p.shape[0]), t] = 1 soft_arr = F.softmax(p) log_arr = -F.log(soft_arr) xent = b*log_arr # # print("sec_arr:{}".format(sec_arr)) # print("xent_shape:{}".format(xent.data.shape)) xent = F.split_axis(xent,sec_arr,axis=0) print([xent_e.data.shape[0] for xent_e in xent]) x_sum = [F.reshape(F.sum(xent_e)/xent_e.data.shape[0],(1,1)) for xent_e in xent] # print("x_sum:{}".format([x_e.data for x_e in x_sum])) xent = F.concat(x_sum,axis=0) # # print("xent1:{}".format(xent.data)) xent = F.max(xent,axis=1)/p.shape[0] # print("xent2:{}".format(xent.data)) if not weigh_flag: return F.sum(xent) # print("wei_arr:{}".format(weight_arr)) # print("wei_arr:{}".format(weight_arr.data.shape)) print("xent3:{}".format(xent.data.shape)) wxent= F.matmul(weight_arr,xent,transa=True) wxent = F.sum(F.sum(wxent,axis=0),axis=0) print("wxent:{}".format(wxent.data)) return wxent
def sequence_embed(embed, xs): x_len = [len(x) for x in xs] x_section = np.cumsum(x_len[:-1]) ex = embed(F.concat(xs, axis=0)) exs = F.split_axis(ex, x_section, 0) return exs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, _ = self.encoder(None, exs) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, ys = self.decoder(h, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs_f = xs xs_b = [x[::-1] for x in xs] exs_f = sequence_embed(self.embed_x, xs_f) exs_b = sequence_embed(self.embed_x, xs_b) fx, _ = self.encoder_f(None, exs_f) bx, _ = self.encoder_b(None, exs_b) h = F.concat([fx, bx], axis=2) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = chainer.functions.split_axis(eys, batch, 0) h, ys = self.decoder(h, eys) cys = chainer.functions.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def compute_vecs(self, word_ids, word_boundaries, phrase_num, char_vecs=None): word_ids = my_variable(word_ids, volatile=not self.train) word_embs = self.emb(word_ids) # total_len x dim word_embs_reshape = F.reshape(word_embs, (1, 1, -1, self.emb_dim)) if self.word_level_flag and char_vecs is not None: # print(char_vecs.data.shape) # print(word_embs.data.shape) word_embs = F.concat([word_embs, char_vecs], axis=1) # print(word_embs.data.shape) dim = self.emb_dim + self.add_dim word_embs_reshape = F.reshape(word_embs, (1, 1, -1, dim)) # 1 x 1 x total_len x dim # convolution word_emb_conv = self.conv(word_embs_reshape) # 1 x dim x total_len x 1 word_emb_conv_reshape = F.reshape(word_emb_conv, (self.hidden_dim, -1)) # max word_emb_conv_reshape = F.split_axis(word_emb_conv_reshape, word_boundaries, axis=1) embs = [F.max(word_emb_conv_word, axis=1) for i, word_emb_conv_word in enumerate(word_emb_conv_reshape) if i % 2 == 1] embs = F.concat(embs, axis=0) phrase_emb_conv = F.reshape(embs, (phrase_num, self.hidden_dim)) return phrase_emb_conv
def forward(self, ws, cs): batchsize, length, max_word_len = cs.shape ws = self.emb_word(ws) # (batch, length, word_dim) cs = F.reshape( F.max_pooling_2d( self.conv_char( F.reshape( self.emb_char(cs), (batchsize * length, 1, max_word_len, 50))), (max_word_len, 1)), (batchsize, length, self.char_dim)) hs = F.transpose(F.concat([ws, cs], 2), (1, 0, 2)) hs = F.dropout(hs, self.dropout_ratio, train=self.train) hs = F.split_axis(hs, length, 0) hs_f = [] hs_b = [] self._init_state() for h_in_f, h_in_b in zip(hs, reversed(hs)): h_f = self.lstm_f2(self.lstm_f1(F.reshape(h_in_f, (batchsize, -1)))) hs_f.append(h_f) h_b = self.lstm_b2(self.lstm_b1(F.reshape(h_in_b, (batchsize, -1)))) hs_b.append(h_b) hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, reversed(hs_b))] cat_ys = [self.linear_cat2(F.dropout( F.elu(self.linear_cat1(h)), 0.5, train=self.train)) for h in hs] hs = [F.reshape(h, (length, -1)) for h in \ F.split_axis(F.transpose(F.stack(hs, 2), (0, 2, 1)), batchsize, 0)] dep_ys = [self.biaffine( F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)), F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs] return cat_ys, dep_ys
def __call__(self, ws, cs, cat_ts, dep_ts): batchsize, length = cat_ts.shape cat_ys, dep_ys = self.forward(ws, cs) cat_ys = cat_ys[1:-1] cat_ts = [F.reshape(x, (batchsize,)) for x \ in F.split_axis(F.transpose(cat_ts), length, 0)] assert len(cat_ys) == len(cat_ts) cat_loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(cat_ys, cat_ts)]) cat_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(cat_ys, cat_ts)]) # hs [(length, hidden_dim), ...] dep_ys = [x[1:-1] for x in dep_ys] dep_ts = [F.reshape(x, (length,)) for x in F.split_axis(dep_ts, batchsize, 0)] dep_loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(dep_ys, dep_ts)]) dep_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(dep_ys, dep_ts)]) cat_acc /= length dep_acc /= batchsize chainer.report({ "tagging_loss": cat_loss, "tagging_accuracy": cat_acc, "parsing_loss": dep_loss, "parsing_accuracy": dep_acc }, self) return cat_loss + dep_loss
def forward(self, ws, ss, ps, dep_ts=None): batchsize = len(ws) xp = chainer.cuda.get_array_module(ws[0]) split = scanl(lambda x,y: x+y, 0, [w.shape[0] for w in ws])[1:-1] wss = self.emb_word(F.hstack(ws)) sss = F.reshape(self.emb_suf(F.vstack(ss)), (-1, 4 * self.afix_dim)) pss = F.reshape(self.emb_prf(F.vstack(ps)), (-1, 4 * self.afix_dim)) ins = F.dropout(F.concat([wss, sss, pss]), self.dropout_ratio, train=self.train) xs_f = list(F.split_axis(ins, split, 0)) xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)] dep_ys = [self.biaffine_arc( F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train))) for h in hs] # if dep_ts is not None and random.random >= 0.5: if dep_ts is not None: heads = dep_ts else: heads = [F.argmax(y, axis=1) for y in dep_ys] heads = F.elu(F.dropout( self.rel_head( F.vstack([F.embed_id(t, h, ignore_label=IGNORE) \ for h, t in zip(hs, heads)])), 0.32, train=self.train)) childs = F.elu(F.dropout(self.rel_dep(F.vstack(hs)), 0.32, train=self.train)) cat_ys = self.biaffine_tag(childs, heads) cat_ys = list(F.split_axis(cat_ys, split, 0)) return cat_ys, dep_ys
def set_state(self, state): self.x, self.c, self.h = F.split_axis( state, (self.in_size, self.in_size + self.size), axis=1)
def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) fs = [self.extractor.process(x) for x in xs] ws, ss, ps = concat_examples(fs, padding=-1) ys = self.forward(ws, ss, ps) ys = F.transpose(F.stack(ys, 2), (0, 2, 1)) return [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \ zip(xs, F.split_axis(ys, batchsize, 0))]
def __call__(self, ws, ss, ps, ts): """ xs [(w,s,p,y), ..., ] w: word, s: suffix, p: prefix, y: label """ batchsize, length = ws.shape cat_ys, dep_ys = self.forward(ws, ss, ps)[1:-1] cat_ts = [F.reshape(x, (batchsize,)) for x \ in F.split_axis(F.transpose(cat_ts), length, 0)] dep_ts = [F.reshape(x, (batchsize,)) for x \ in F.split_axis(F.transpose(dep_ts), length, 0)] cat_loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(cat_ys, cat_ts)]) cat_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(cat_ys, cat_ts)]) dep_loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(dep_ys, dep_ts)]) dep_acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(dep_ys, dep_ts)]) cat_acc /= length dep_acc /= length chainer.report({ "tagging_loss": cat_loss, "tagging_accuracy": cat_acc, "parsing_loss": dep_loss, "parsing_accuracy": dep_acc }, self) return cat_loss + dep_loss
def embed(self, source, train=True): xp = self.xp mask = xp.expand_dims(source != -1, -1) self.mask = chainer.Variable(mask, volatile=not train) x = chainer.Variable(source, volatile=not train) embs = self.src_emb(x) embs = F.split_axis(embs, embs.data.shape[1], 1) return embs
def __call__(self, source, target, lengths=None, train=True): self.batchsize, self.source_length = source.shape state = self.encode(source, train=train) state = self.prepare_decoding(state, lengths, train=train) y = None if target is not None: y = chainer.Variable(target, volatile=not train) y = F.split_axis(y, y.data.shape[1], 1) outs, loss = self.decode(state, y, train=train) return outs, loss
def iaf(self, z, h, lin1, lin2): ms = F.crelu(lin1(F.concat((z, h), axis=1))) ms = lin2(ms) m, s = F.split_axis(ms, 2, axis=1) s = F.sigmoid(s) z = s*z + (1-s)*m # pdb.set_trace() return z, -F.sum(F.log(s), axis=1)
def __call__(self, X): pad = self._kernel_size[1] - 1 WX = self.W(X) if pad > 0: WX = WX[..., :-pad] A, B = functions.split_axis(WX, 2, axis=1) H = A * functions.sigmoid(B) return H # Connections
def __call__(self, x, split_into_variables=True, discard_context=False): batchsize = x.shape[0] seq_length = x.shape[3] # conv out_data = self.conv_blocks(x) out_data = functions.reshape(out_data, (batchsize, -1, seq_length)) # rnn for index, blocks in enumerate(self.rnn_blocks.blocks): sru = blocks[0] dropout = blocks[1] if len(blocks) == 2 else None hidden, cell, context = sru(out_data, self.contexts[index]) if discard_context is False: self.contexts[index] = context if dropout is not None: out_data = dropout(out_data) # fc out_data = self.dense_blocks(out_data) assert out_data.shape[2] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 2) out_data = F.squeeze(out_data, axis=2) return out_data
def __call__(self, X, skip_mask=None): # remove right paddings # e.g. # kernel_size = 3 # pad = 2 # input sequence with paddings: # [0, 0, x1, x2, x3, 0, 0] # |< t1 >| # |< t2 >| # |< t3 >| pad = self._kernel_size - 1 WX = self.W(X)[..., :-pad] return self.pool(functions.split_axis(WX, self.num_split, axis=1), skip_mask=skip_mask)
def forward_one_step(self, X, skip_mask=None): pad = self._kernel_size - 1 WX = self.W(X)[:, :, -pad-1, None] return self.pool(functions.split_axis(WX, self.num_split, axis=1), skip_mask=skip_mask)
def forward_one_step(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X)[..., -pad-1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def check_forward(self, x_data, ys_data, indices_or_sections, axis): x = chainer.Variable(x_data) ys = functions.split_axis(x, indices_or_sections, axis) for yd, y in zip(ys_data, ys): self.assertEqual(y.data.dtype, self.dtype) self.assertIsInstance(y.data.shape, tuple) gradient_check.assert_allclose(yd, y.data, atol=0, rtol=0)