我们从Python开源项目中,提取了以下19个代码示例,用于说明如何使用chainer.functions.squeeze()。
def forward(self, pretrained_word_tokens, word_tokens, pos_tokens): X = [] batch = len(word_tokens) for i in range(batch): xs_words_pretrained = \ self.embed[0](self.xp.array(pretrained_word_tokens[i])) xs_words = self.embed[1](self.xp.array(word_tokens[i])) xs_words += xs_words_pretrained xs_tags = self.embed[2](self.xp.array(pos_tokens[i])) xs = F.concat([ teras_F.dropout(xs_words, self.embed._dropout_ratio), teras_F.dropout(xs_tags, self.embed._dropout_ratio)]) X.append(xs) R = self.blstm(X) R = F.pad_sequence(R) H_arc_dep = self.mlp_arc_dep(R) H_arc_head = self.mlp_arc_head(R) arc_logits = self.arc_biaffine(H_arc_dep, H_arc_head) arc_logits = F.squeeze(arc_logits, axis=3) H_label_dep = self.mlp_label_dep(R) H_label_head = self.mlp_label_head(R) label_logits = self.label_biaffine(H_label_dep, H_label_head) return arc_logits, label_logits
def predict(self, xs): """ batch: list of splitted sentences """ xs = [self.extractor.process(x) for x in xs] batchsize = len(xs) ws, cs, ls = zip(*xs) ws = map(self.emb_word, ws) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, hs_b)] return [y.data[1:-1] for y in ys]
def __call__(self, ws, ss, ps, ts): """ xs [(w,s,p,y), ..., ] w: word, s: suffix, p: prefix, y: label """ batchsize, length = ts.shape ys = self.forward(ws, ss, ps)[1:-1] ts = [F.squeeze(x, 0) for x in F.split_axis(F.transpose(ts), length, 0)] loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)]) acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)]) acc /= length chainer.report({ "loss": loss, "accuracy": acc }, self) return loss
def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) xs = [self.extractor.process(x) for x in xs] ws, ss, ps = concat_examples(xs, padding=IGNORE) cat_ys, dep_ys = self.forward(ws, ss, ps) cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1)) dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1)) cat_ys = [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \ zip(xs, F.split_axis(cat_ys, batchsize, 0))] dep_ys = [F.squeeze(F.log_softmax(y[1:len(x) + 1, :-1]), 0).data \ for x, y in zip(xs, F.split_axis(dep_ys, batchsize, 0))] return cat_ys, dep_ys
def __call__(self, x, split_into_variables=True): batchsize = x.shape[0] seq_length = x.shape[3] out_data = super(AcousticModel, self).__call__(x) assert out_data.shape[3] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 3) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 3) out_data = F.squeeze(out_data, axis=2) return out_data
def __call__(self, x, z, ze, mask, conv_mask): att_scale = self.xp.sum( mask, axis=2, keepdims=True)[:, None, :, :] ** 0.5 pad = self.xp.zeros( (x.shape[0], x.shape[1], self.width - 1, 1), dtype=x.dtype) base_x = x z = F.squeeze(z, axis=3) # Note: these behaviors of input, output, and attention result # may refer to the code by authors, which looks little different # from the paper's saying. for conv_name, preatt_name in zip(self.conv_names, self.preatt_names): # Calculate Output of GLU out = getattr(self, conv_name)( F.concat([pad, x], axis=2), conv_mask) # Calcualte Output of Attention using Output of GLU preatt = seq_linear(getattr(self, preatt_name), out) query = base_x + preatt query = F.squeeze(query, axis=3) c = self.attend(query, z, ze, mask) * att_scale # Merge Them in Redidual Calculation and Scaling x = (x + (c + out) * scale05) * scale05 return x
def __call__(self): mem_optimize = nmtrain.optimization.chainer_mem_optimize # Calculate Attention vector a = self.attention(self.S, self.h) # Calculate context vector c = F.squeeze(F.batch_matmul(self.S, a, transa=True), axis=2) # Calculate hidden vector + context self.ht = self.context_project(F.concat((self.h, c), axis=1)) # Calculate Word probability distribution y = mem_optimize(self.affine_vocab, F.tanh(self.ht), level=1) if self.use_lexicon: y = self.lexicon_model(y, a, self.ht, self.lexicon_matrix) if nmtrain.environment.is_train(): return nmtrain.models.decoders.Output(y=y) else: # Return the vocabulary size output projection return nmtrain.models.decoders.Output(y=y, a=a)
def __call__(self, y, a, ht, y_lex): y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2) return (y + F.log(y_dict + self.alpha)) #class LinearInterpolationLexicon(chainer.Chain): # def __init__(self, hidden_size): # super(LinearInterpolationLexicon, self).__init__( # perceptron = chainer.links.Linear(hidden_size, 1) # ) # # def __call__(self, y, a, ht, y_lex): # y = F.softmax(y) # y_dict = F.squeeze(F.batch_matmul(y_lex, a, transa=True), axis=2) # gamma = F.broadcast_to(F.sigmoid(self.perceptron(ht)), y_dict.data.shape) # return (gamma * y_dict + (1-gamma) * y) #
def forward(self, ws, cs, ls, dep_ts=None): batchsize = len(ws) xp = chainer.cuda.get_array_module(ws[0]) ws = map(self.emb_word, ws) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (int(l[0]), 1))) for c, l in zip(cs, ls)] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(xp, batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)] dep_ys = [self.biaffine_arc( F.elu(F.dropout(self.arc_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.arc_head(h), 0.32, train=self.train))) for h in hs] if dep_ts is not None: heads = dep_ts else: heads = [F.argmax(y, axis=1) for y in dep_ys] cat_ys = [ self.biaffine_tag( F.elu(F.dropout(self.rel_dep(h), 0.32, train=self.train)), F.elu(F.dropout(self.rel_head( F.embed_id(t, h, ignore_label=IGNORE)), 0.32, train=self.train))) \ for h, t in zip(hs, heads)] return cat_ys, dep_ys
def forward(self, ws, cs, ls): """ xs [(w,s,p,y), ..., ] w: word, c: char, l: length, y: label """ batchsize = len(ws) # cs: [(sentence length, max word length)] ws = map(self.emb_word, ws) # ls: [(sentence length, char dim)] # before conv: (sent len, 1, max word len, char_size) # after conv: (sent len, char_size, max word len, 1) # after max_pool: (sent len, char_size, 1, 1) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] # [(sentence length, (word_dim + char_dim))] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] hs = [F.concat([h_f, h_b]) for h_f, h_b in zip(hs_f, hs_b)] cat_ys = [self.linear_cat2(F.relu(self.linear_cat1(h))) for h in hs] dep_ys = [self.biaffine( F.relu(F.dropout(self.linear_dep(h), 0.32, train=self.train)), F.relu(F.dropout(self.linear_head(h), 0.32, train=self.train))) for h in hs] return cat_ys, dep_ys
def predict(self, xs): """ batch: list of splitted sentences """ batchsize = len(xs) fs = [self.extractor.process(x) for x in xs] ws, ss, ps = concat_examples(fs, padding=-1) ys = self.forward(ws, ss, ps) ys = F.transpose(F.stack(ys, 2), (0, 2, 1)) return [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \ zip(xs, F.split_axis(ys, batchsize, 0))]
def __call__(self, x): return functions.squeeze(x, self.axis)
def __call__(self, x, split_into_variables=True, discard_context=False): batchsize = x.shape[0] seq_length = x.shape[3] # conv out_data = self.conv_blocks(x) out_data = functions.reshape(out_data, (batchsize, -1, seq_length)) # rnn for index, blocks in enumerate(self.rnn_blocks.blocks): sru = blocks[0] dropout = blocks[1] if len(blocks) == 2 else None hidden, cell, context = sru(out_data, self.contexts[index]) if discard_context is False: self.contexts[index] = context if dropout is not None: out_data = dropout(out_data) # fc out_data = self.dense_blocks(out_data) assert out_data.shape[2] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 2) out_data = F.squeeze(out_data, axis=2) return out_data
def __call__(self, S, h): return F.squeeze(F.softmax(F.batch_matmul(S, h)), axis=2)
def __call__(self, S, h): batch_size, src_len, hidden_size = S.data.shape S = self.inner_weight(F.reshape(S, (batch_size * src_len, hidden_size))) S = F.reshape(S, (batch_size, src_len, hidden_size)) a = F.softmax(F.squeeze(F.batch_matmul(S, h), axis = 2)) return a # MLP layer, as of Bahdanau+ 15
def __call__(self, id, x): W = self.W_embedding(id) b = F.squeeze(self.b_embedding(id)) # Reshape the vector to be the right dimensions for 2D conv W = F.reshape(W, (self.out_channels, self.in_channels, self.kh, self.kw)) return F.convolution_2d(x, W, b, self.stride, self.pad)
def __call__(self, xs): """ xs [(w,s,p,y), ..., ] w: word, c: char, l: length, y: label """ batchsize = len(xs) ws, cs, ls, ts = zip(*xs) # cs: [(sentence length, max word length)] ws = map(self.emb_word, ws) # ls: [(sentence length, char dim)] # cs = map(lambda (c, l): F.sum(self.emb_char(c), 1) / l, zip(cs, ls)) # cs = [F.reshape(F.average_pooling_2d( # F.expand_dims(self.emb_char(c), 0), (l, 1)), (-1, self.char_dim)) # for c, l in zip(cs, ls)] # before conv: (sent len, 1, max word len, char_size) # after conv: (sent len, char_size, max word len, 1) # after max_pool: (sent len, char_size, 1, 1) cs = [F.squeeze( F.max_pooling_2d( self.conv_char( F.expand_dims( self.emb_char(c), 1)), (l, 1))) for c, l in zip(cs, ls)] # [(sentence length, (word_dim + char_dim))] xs_f = [F.dropout(F.concat([w, c]), self.dropout_ratio, train=self.train) for w, c in zip(ws, cs)] xs_b = [x[::-1] for x in xs_f] cx_f, hx_f, cx_b, hx_b = self._init_state(batchsize) _, _, hs_f = self.lstm_f(hx_f, cx_f, xs_f, train=self.train) _, _, hs_b = self.lstm_b(hx_b, cx_b, xs_b, train=self.train) hs_b = [x[::-1] for x in hs_b] # ys: [(sentence length, number of category)] ys = [self.linear2(F.relu(self.linear1(F.concat([h_f, h_b])))) for h_f, h_b in zip(hs_f, hs_b)] # ys = [self.linear2(F.relu( # self.linear1( # F.squeeze( # F.transpose( # F.relu(self.conv1( # F.reshape( # F.concat([h_f, h_b]), # (1, 1, -1, 2 * self.hidden_dim))), (0, 3, 2, 1)) # ))))) # for h_f, h_b in zip(hs_f, hs_b)] loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)]) acc = reduce(lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)]) acc /= batchsize chainer.report({ "loss": loss, "accuracy": acc }, self) return loss