我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用chainer.functions.swapaxes()。
def __call__(self, x, split_into_variables=True): batchsize = x.shape[0] seq_length = x.shape[3] out_data = super(AcousticModel, self).__call__(x) assert out_data.shape[3] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 3) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 3) out_data = F.squeeze(out_data, axis=2) return out_data
def encode(self, X, skip_mask=None): batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.encoder_embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) last_hidden_states = [] for layer_index in range(0, self.num_layers): encoder = self.get_encoder(layer_index) last_hidden_states.append(encoder.get_last_hidden_state()) return last_hidden_states
def __call__(self, X, return_last=False): batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_layer(0, enmbedding) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1]) # dense conv in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else out_data # dense conv if return_last: out_data = out_data[:, :, -1, None] if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size)) return out_data
def __call__(self, X, return_last=False): batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) residual_input = enmbedding if self.ndim_h == self.ndim_embedding else 0 out_data = self._forward_layer(0, enmbedding) for layer_index in xrange(1, self.num_blocks * self.num_layers_per_block): out_data = self._forward_layer(layer_index, out_data) if (layer_index + 1) % self.num_layers_per_block == 0: if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data += residual_input residual_input = out_data if return_last: out_data = out_data[:, :, -1, None] out_data = self.dense(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size)) return out_data
def propdown(self, hid): """ This function propagates the hidden units activation downwords to the visible units :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - given h_sample :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1 """ batch_size = hid.data.shape[0] if self.real == 0: W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)) v_mean = F.sigmoid(pre_sigmoid_activation) #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape) #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7]) #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3]) #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10]) #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data) #print('v_mean', v_mean.data.shape) #print('v_mean sum', F.sum(v_mean).data) #print('hid', hid.data.shape) else: # TODO: check W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) return v_mean
def reconstruct(self, v): """ :param v: Variable Matrix(batch_size, in_channels, image_height, image_width) :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width) """ batch_size = v.data.shape[0] xp = cuda.get_array_module(v.data) if self.real == 0: h = F.sigmoid(self.conv(v)) else: std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1)) h = F.sigmoid(self.conv(v / std_ch)) # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden))) W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1)) # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))) return reconstructed_v
def encode(self, x_input, x_query, answer): m = self.encode_input(x_input) u = self.encode_query(x_query) # print "m.data.shape", m.data.shape # print "u.data.shape", u.data.shape mu = functions.matmul(m, u, transb=True) # print "mu.data.shape", mu.data.shape # print "mu.data", mu.data p = functions.softmax(mu) c = self.encode_output(x_input) # print "p.data.shape:", p.data.shape # print "c.data.shape:", c.data.shape # print "functions.swapaxes(c ,2, 1):", functions.swapaxes(c ,2, 1).data.shape o = functions.matmul(functions.swapaxes(c ,1, 0), p) # (2, 50, 1) o = functions.swapaxes(o ,1, 0) # (2, 50) # print "u.data.shape:", u.data.shape # print "o.data.shape:", o.data.shape # print "u.data.shape:", u.data # print "o.data.shape:", o.data # print (u+o).data.shape predict = self.W(u + o) # print predict.data.shape loss = functions.softmax_cross_entropy(predict, answer) return loss
def pre(self, x): dims = len(x.shape) - 1 if self.kernel_size == 1: ret = self.W(x) elif self.kernel_size == 2: if dims == 2: xprev = Variable( self.xp.zeros((self.batch_size, 1, self.in_size), dtype=np.float32), volatile='AUTO') xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1) else: xtminus1 = self.x ret = self.W(x) + self.V(xtminus1) else: ret = F.swapaxes(self.conv( F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2) if not self.attention: return ret if dims == 1: enc = self.encoding[:, -1, :] else: enc = self.encoding[:, -1:, :] return sum(F.broadcast(self.U(enc), ret))
def __call__(self, x): return functions.swapaxes(x, self.axis1, self.axis2)
def __call__(self, x, split_into_variables=True, discard_context=False): batchsize = x.shape[0] seq_length = x.shape[3] # conv out_data = self.conv_blocks(x) out_data = functions.reshape(out_data, (batchsize, -1, seq_length)) # rnn for index, blocks in enumerate(self.rnn_blocks.blocks): sru = blocks[0] dropout = blocks[1] if len(blocks) == 2 else None hidden, cell, context = sru(out_data, self.contexts[index]) if discard_context is False: self.contexts[index] = context if dropout is not None: out_data = dropout(out_data) # fc out_data = self.dense_blocks(out_data) assert out_data.shape[2] == seq_length # CTC???????RNN???????Variable???????? if split_into_variables: out_data = F.swapaxes(out_data, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 2) out_data = F.squeeze(out_data, axis=2) return out_data
def decode(self, X, encoder_last_hidden_states, return_last=False): assert len(encoder_last_hidden_states) == self.num_layers batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.decoder_embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_decoder_layer(0, enmbedding, encoder_last_hidden_states[0]) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_decoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index]) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv if return_last: out_data = out_data[:, :, -1, None] if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec)) return out_data
def decode_one_step(self, X, encoder_last_hidden_states): assert len(encoder_last_hidden_states) == self.num_layers batchsize = X.shape[0] seq_length = X.shape[1] ksize = self.decoder_kernel_size if seq_length < ksize: self.reset_state() return self.decode(X, encoder_last_hidden_states, return_last=True) xt = X[:, -ksize:] enmbedding = self.decoder_embed(xt) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_decoder_layer_one_step(0, enmbedding, encoder_last_hidden_states[0]) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_decoder_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index]) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv out_data = out_data[:, :, -1, None] if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec)) return out_data
def encode(self, X, skip_mask=None): batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.encoder_embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) last_hidden_states = [] last_layer_outputs = None for layer_index in range(0, self.num_layers): encoder = self.get_encoder(layer_index) last_hidden_states.append(encoder.get_last_hidden_state()) last_layer_outputs = encoder.get_all_hidden_states() return last_hidden_states, last_layer_outputs
def decode(self, X, encoder_last_hidden_states, encoder_last_layer_outputs, encoder_skip_mask=None, return_last=False): assert len(encoder_last_hidden_states) == self.num_layers batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.decoder_embed(X) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_decoder_layer(0, enmbedding, encoder_last_hidden_states[0], encoder_last_layer_outputs, encoder_skip_mask) in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_decoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index], encoder_last_layer_outputs, encoder_skip_mask) in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv if return_last: out_data = out_data[:, :, -1, None] if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec)) return out_data
def forward_one_step(self, X): batchsize = X.shape[0] seq_length = X.shape[1] ksize = self.kernel_size if seq_length < ksize: self.reset_state() return self.__call__(X, return_last=True) xt = X[:, -ksize:] enmbedding = self.embed(xt) enmbedding = F.swapaxes(enmbedding, 1, 2) out_data = self._forward_layer_one_step(0, enmbedding)[:, :, -ksize:] in_data = [out_data] for layer_index in range(1, self.num_layers): out_data = self._forward_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1])[:, :, -ksize:] # dense conv in_data.append(out_data) out_data = F.concat(in_data) if self.densely_connected else out_data # dense conv if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = out_data[..., -1, None] out_data = self.fc(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size)) return out_data
def check_forward(self, x_data): axis1, axis2 = self.axis1, self.axis2 x = chainer.Variable(x_data) y = functions.swapaxes(x, axis1, axis2) self.assertEqual(y.data.dtype, self.dtype) self.assertTrue((self.x.swapaxes(axis1, axis2) == cuda.to_cpu(y.data)).all())
def check_backward(self, x_data): x = chainer.Variable(x_data) y = functions.swapaxes(x, self.axis1, self.axis2) y.grad = y.data y.backward() gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def __call__(self, S, h): batch_size, src_len, hidden_size = S.data.shape h = F.broadcast_to(F.expand_dims(h, axis=2), (batch_size, hidden_size, src_len)) h = F.swapaxes(h, 1, 2) S = F.reshape(F.concat((S, h), axis=2), (batch_size * src_len, 2 * hidden_size)) a = F.softmax(F.reshape(self.second_layer(F.tanh(self.first_layer(S))), (batch_size, src_len))) return a
def read(self, h): #M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m) M_key = F.stack(self.key_buff, axis=1) # (B, M, m) self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M) #p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M) #print("p", p.shape) #M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m) M_val = F.stack(self.val_buff, axis=1) # (B, M, m) #print("M_val", M_val.shape) o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m) o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m) #print("o", o.shape) return o, self.p
def forward_one_step(self, X): batchsize = X.shape[0] seq_length = X.shape[1] ksize = self.kernel_size if seq_length < ksize: self.reset_state() return self.__call__(X, return_last=True) xt = X[:, -ksize:] enmbedding = self.embed(xt) enmbedding = F.swapaxes(enmbedding, 1, 2) residual_input = enmbedding if self.ndim_h == self.ndim_embedding else 0 out_data = self._forward_layer_one_step(0, enmbedding)[:, :, -ksize:] for layer_index in xrange(1, self.num_blocks * self.num_layers_per_block): out_data = self._forward_layer_one_step(layer_index, out_data)[:, :, -ksize:] if (layer_index + 1) % self.num_layers_per_block == 0: if self.using_dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data += residual_input residual_input = out_data out_data = out_data[..., -1, None] out_data = self.dense(out_data) out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size)) return out_data
def __array(array, dtype, is_volatile, transposition): volatile = "ON" if is_volatile else "OFF" if not transposition: return Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile) else: return F.swapaxes(Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile), 0, 1)
def __call__(self, X, ht_enc, H_enc, skip_mask=None): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[:, :, :-pad] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states self.contexts = [] for t in xrange(T): z = Z[..., t] f = F[..., t] if t == 0: ct = (1 - f) * z self.contexts.append(ct) else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t] bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if t == 0: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def forward_one_step(self, X, ht_enc, H_enc, skip_mask): pad = self._kernel_size - 1 WX = self.W(X)[:, :, -pad-1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states for t in xrange(T): z = Z[..., t] f = F[..., t] if self.contexts is None: ct = (1 - f) * z self.contexts = [ct] else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t - T] bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if self.H is None: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate): if args.gpu_enc != args.gpu_dec: # enc?dec??GPU??? chainer.cuda.get_device(args.gpu_enc).use() encLen = len(sentence) # ?? cMBSize = len(sentence[0]) # minibatch size # ?????embedding??? ?????????? encEmbList = self.getEncoderInputEmbeddings(sentence, args) flag_train = (train_mode > 0) lstmVars = [0] * self.n_layers * 2 if self.flag_merge_encfwbw == 0: # fw?bw?????????????? hyf, cyf, fwHout = self.model.encLSTM_f( None, None, encEmbList, flag_train, args) # ??? hyb, cyb, bkHout = self.model.encLSTM_b( None, None, encEmbList[::-1], flag_train, args) # ??? for z in six.moves.range(self.n_layers): lstmVars[2 * z] = cyf[z] + cyb[z] lstmVars[2 * z + 1] = hyf[z] + hyb[z] elif self.flag_merge_encfwbw == 1: # fw?bw???????? sp = (cMBSize, self.hDim) for z in six.moves.range(self.n_layers): if z == 0: # ??? embedding??? biH = encEmbList else: # ????? ???????? # ????????bkHout???????????? biH = fwHout + bkHout[::-1] # z????? hyf, cyf, fwHout = self.model.encLSTM_f( z, biH, flag_train, dropout_rate, args) # z?????? hyb, cyb, bkHout = self.model.encLSTM_b( z, biH[::-1], flag_train, dropout_rate, args) # ?????????????????????????? # ??????? lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp) lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp) else: assert 0, "ERROR" # ????? if self.flag_enc_boseos == 0: # default # fwHout?[:,]??????????? biHiddenStack = fwHout[:, ] + bkHout[::-1] elif self.flag_enc_boseos == 1: bkHout2 = bkHout[::-1] # ????? biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ] # BOS, EOS?????? TODO ??????0?????????? encLen -= 2 else: assert 0, "ERROR" # (enc????, minibatch??, ??????) # => (minibatch??, enc????, ??????)??? biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1) # ?LSTM???????????decoder?LSTM???????? lstmVars = chaFunc.stack(lstmVars) # encoder????encInfoObject??????? retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize) return retO