Python chainer.functions 模块,swapaxes() 实例源码

我们从Python开源项目中,提取了以下28个代码示例,用于说明如何使用chainer.functions.swapaxes()

项目:chainer-speech-recognition    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x, split_into_variables=True):
        batchsize = x.shape[0]
        seq_length = x.shape[3]

        out_data = super(AcousticModel, self).__call__(x)
        assert out_data.shape[3] == seq_length

        # CTC???????RNN???????Variable????????
        if split_into_variables:
            out_data = F.swapaxes(out_data, 1, 3)
            out_data = F.reshape(out_data, (batchsize, -1))
            out_data = F.split_axis(out_data, seq_length, axis=1)
        else:
            out_data = F.swapaxes(out_data, 1, 3)
            out_data = F.squeeze(out_data, axis=2)

        return out_data
项目:chainer-speech-recognition    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x, split_into_variables=True):
        batchsize = x.shape[0]
        seq_length = x.shape[3]

        out_data = super(AcousticModel, self).__call__(x)
        assert out_data.shape[3] == seq_length

        # CTC???????RNN???????Variable????????
        if split_into_variables:
            out_data = F.swapaxes(out_data, 1, 3)
            out_data = F.reshape(out_data, (batchsize, -1))
            out_data = F.split_axis(out_data, seq_length, axis=1)
        else:
            out_data = F.swapaxes(out_data, 1, 3)
            out_data = F.squeeze(out_data, axis=2)

        return out_data
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def encode(self, X, skip_mask=None):
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        enmbedding = self.encoder_embed(X)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask)
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask)
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        last_hidden_states = []
        for layer_index in range(0, self.num_layers):
            encoder = self.get_encoder(layer_index)
            last_hidden_states.append(encoder.get_last_hidden_state())

        return last_hidden_states
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, X, return_last=False):
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        enmbedding = self.embed(X)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_layer(0, enmbedding)
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1])   # dense conv
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else out_data    # dense conv

        if return_last:
            out_data = out_data[:, :, -1, None]

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        out_data = self.fc(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))

        return out_data
项目:chainer-glu    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, X, return_last=False):
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        enmbedding = self.embed(X)
        enmbedding = F.swapaxes(enmbedding, 1, 2)
        residual_input = enmbedding if self.ndim_h == self.ndim_embedding else 0

        out_data = self._forward_layer(0, enmbedding)
        for layer_index in xrange(1, self.num_blocks * self.num_layers_per_block):
            out_data = self._forward_layer(layer_index, out_data)
            if (layer_index + 1) % self.num_layers_per_block == 0:
                if self.using_dropout:
                    out_data = F.dropout(out_data, ratio=self.dropout)
                out_data += residual_input
                residual_input = out_data

        if return_last:
            out_data = out_data[:, :, -1, None]

        out_data = self.dense(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))

        return out_data
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        """
        batch_size = hid.data.shape[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
            #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
            #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
            #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean', v_mean.data.shape)
            #print('v_mean sum', F.sum(v_mean).data)
            #print('hid', hid.data.shape)

        else:
            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean
项目:SeRanet    作者:corochann    | 项目源码 | 文件源码
def reconstruct(self, v):
        """

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        """
        batch_size = v.data.shape[0]
        xp = cuda.get_array_module(v.data)
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
        else:
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v
项目:MemoryNetwork    作者:aonotas    | 项目源码 | 文件源码
def encode(self, x_input, x_query, answer):
        m = self.encode_input(x_input)
        u = self.encode_query(x_query)

        # print "m.data.shape", m.data.shape
        # print "u.data.shape", u.data.shape
        mu = functions.matmul(m, u, transb=True)
        # print "mu.data.shape", mu.data.shape
        # print "mu.data",  mu.data
        p = functions.softmax(mu)
        c = self.encode_output(x_input)
        # print "p.data.shape:", p.data.shape
        # print "c.data.shape:", c.data.shape
        # print "functions.swapaxes(c ,2, 1):", functions.swapaxes(c ,2, 1).data.shape
        o = functions.matmul(functions.swapaxes(c ,1, 0), p) # (2, 50, 1)
        o = functions.swapaxes(o ,1, 0) # (2, 50) 
        # print "u.data.shape:", u.data.shape
        # print "o.data.shape:", o.data.shape
        # print "u.data.shape:", u.data
        # print "o.data.shape:", o.data
        # print (u+o).data.shape
        predict = self.W(u + o)
        # print predict.data.shape
        loss = functions.softmax_cross_entropy(predict, answer)
        return loss
项目:depccg    作者:masashi-y    | 项目源码 | 文件源码
def pre(self, x):
        dims = len(x.shape) - 1

        if self.kernel_size == 1:
            ret = self.W(x)
        elif self.kernel_size == 2:
            if dims == 2:
                xprev = Variable(
                    self.xp.zeros((self.batch_size, 1, self.in_size),
                                  dtype=np.float32), volatile='AUTO')
                xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1)
            else:
                xtminus1 = self.x
            ret = self.W(x) + self.V(xtminus1)
        else:
            ret = F.swapaxes(self.conv(
                F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2)

        if not self.attention:
            return ret

        if dims == 1:
            enc = self.encoding[:, -1, :]
        else:
            enc = self.encoding[:, -1:, :]
        return sum(F.broadcast(self.U(enc), ret))
项目:chainer-speech-recognition    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        return functions.swapaxes(x, self.axis1, self.axis2)
项目:chainer-speech-recognition    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x, split_into_variables=True, discard_context=False):
        batchsize = x.shape[0]
        seq_length = x.shape[3]

        # conv
        out_data = self.conv_blocks(x)
        out_data = functions.reshape(out_data, (batchsize, -1, seq_length))

        # rnn
        for index, blocks in enumerate(self.rnn_blocks.blocks):
            sru = blocks[0]
            dropout = blocks[1] if len(blocks) == 2 else None
            hidden, cell, context = sru(out_data, self.contexts[index])
            if discard_context is False:
                self.contexts[index] = context
            if dropout is not None:
                out_data = dropout(out_data)

        # fc
        out_data = self.dense_blocks(out_data)
        assert out_data.shape[2] == seq_length

        # CTC???????RNN???????Variable????????
        if split_into_variables:
            out_data = F.swapaxes(out_data, 1, 2)
            out_data = F.reshape(out_data, (batchsize, -1))
            out_data = F.split_axis(out_data, seq_length, axis=1)
        else:
            out_data = F.swapaxes(out_data, 1, 2)
            out_data = F.squeeze(out_data, axis=2)

        return out_data
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def decode(self, X, encoder_last_hidden_states, return_last=False):
        assert len(encoder_last_hidden_states) == self.num_layers
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        enmbedding = self.decoder_embed(X)
        enmbedding = F.swapaxes(enmbedding, 1, 2)


        out_data = self._forward_decoder_layer(0, enmbedding, encoder_last_hidden_states[0])
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_decoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index])
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv

        if return_last:
            out_data = out_data[:, :, -1, None]

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        out_data = self.fc(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))

        return out_data
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def decode_one_step(self, X, encoder_last_hidden_states):
        assert len(encoder_last_hidden_states) == self.num_layers
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        ksize = self.decoder_kernel_size

        if seq_length < ksize:
            self.reset_state()
            return self.decode(X, encoder_last_hidden_states, return_last=True)

        xt = X[:, -ksize:]
        enmbedding = self.decoder_embed(xt)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_decoder_layer_one_step(0, enmbedding, encoder_last_hidden_states[0])
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_decoder_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index])
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv
        out_data = out_data[:, :, -1, None]

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        out_data = self.fc(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))

        return out_data
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def encode(self, X, skip_mask=None):
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        enmbedding = self.encoder_embed(X)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_encoder_layer(0, enmbedding, skip_mask=skip_mask)
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_encoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], skip_mask=skip_mask)
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        last_hidden_states = []
        last_layer_outputs = None
        for layer_index in range(0, self.num_layers):
            encoder = self.get_encoder(layer_index)
            last_hidden_states.append(encoder.get_last_hidden_state())
            last_layer_outputs = encoder.get_all_hidden_states()

        return last_hidden_states, last_layer_outputs
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def decode(self, X, encoder_last_hidden_states, encoder_last_layer_outputs, encoder_skip_mask=None, return_last=False):
        assert len(encoder_last_hidden_states) == self.num_layers
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        enmbedding = self.decoder_embed(X)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_decoder_layer(0, enmbedding, encoder_last_hidden_states[0], encoder_last_layer_outputs, encoder_skip_mask)
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_decoder_layer(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1], encoder_last_hidden_states[layer_index], encoder_last_layer_outputs, encoder_skip_mask)
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else in_data[-1] # dense conv

        if return_last:
            out_data = out_data[:, :, -1, None]

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        out_data = self.fc(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size_dec))

        return out_data
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def forward_one_step(self, X):
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        ksize = self.kernel_size

        if seq_length < ksize:
            self.reset_state()
            return self.__call__(X, return_last=True)

        xt = X[:, -ksize:]
        enmbedding = self.embed(xt)
        enmbedding = F.swapaxes(enmbedding, 1, 2)

        out_data = self._forward_layer_one_step(0, enmbedding)[:, :, -ksize:]
        in_data = [out_data]

        for layer_index in range(1, self.num_layers):
            out_data = self._forward_layer_one_step(layer_index, F.concat(in_data) if self.densely_connected else in_data[-1])[:, :, -ksize:]   # dense conv
            in_data.append(out_data)

        out_data = F.concat(in_data) if self.densely_connected else out_data    # dense conv

        if self.using_dropout:
            out_data = F.dropout(out_data, ratio=self.dropout)

        out_data = out_data[..., -1, None]
        out_data = self.fc(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))

        return out_data
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def check_forward(self, x_data):
        axis1, axis2 = self.axis1, self.axis2
        x = chainer.Variable(x_data)
        y = functions.swapaxes(x, axis1, axis2)
        self.assertEqual(y.data.dtype, self.dtype)
        self.assertTrue((self.x.swapaxes(axis1, axis2) ==
                         cuda.to_cpu(y.data)).all())
项目:chainer-deconv    作者:germanRos    | 项目源码 | 文件源码
def check_backward(self, x_data):
        x = chainer.Variable(x_data)
        y = functions.swapaxes(x, self.axis1, self.axis2)
        y.grad = y.data
        y.backward()
        gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
项目:adversarial-autoencoder    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, x):
        return functions.swapaxes(x, self.axis1, self.axis2)
项目:nmtrain    作者:philip30    | 项目源码 | 文件源码
def __call__(self, S, h):
    batch_size, src_len, hidden_size = S.data.shape
    h = F.broadcast_to(F.expand_dims(h, axis=2), (batch_size, hidden_size, src_len))
    h = F.swapaxes(h, 1, 2)
    S = F.reshape(F.concat((S, h), axis=2), (batch_size * src_len, 2 * hidden_size))
    a = F.softmax(F.reshape(self.second_layer(F.tanh(self.first_layer(S))), (batch_size, src_len)))
    return a
项目:chainer_frmqn    作者:okdshin    | 项目源码 | 文件源码
def read(self, h):
        #M_key = F.swapaxes(F.stack(self.key_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
        M_key = F.stack(self.key_buff, axis=1) # (B, M, m)

        self.p = F.softmax(F.reshape(F.batch_matmul(M_key, h, transa=False, transb=False), (h.shape[0], M_key.shape[1]))) # (B, M)
        #p = F.reshape(p, (h.shape[0], 1, M_key.shape[1])) # (B, 1, M)
        #print("p", p.shape)
        #M_val = F.swapaxes(F.stack(self.val_buff, axis=0), axis1=0, axis2=1) # (B, M, m)
        M_val = F.stack(self.val_buff, axis=1) # (B, M, m)
        #print("M_val", M_val.shape)
        o = F.batch_matmul(self.p, M_val, transa=True, transb=False) # (B, 1, m)
        o = F.reshape(o, (o.shape[0], o.shape[2])) # (B, m)
        #print("o", o.shape)
        return o, self.p
项目:chainer-glu    作者:musyoku    | 项目源码 | 文件源码
def forward_one_step(self, X):
        batchsize = X.shape[0]
        seq_length = X.shape[1]
        ksize = self.kernel_size

        if seq_length < ksize:
            self.reset_state()
            return self.__call__(X, return_last=True)

        xt = X[:, -ksize:]
        enmbedding = self.embed(xt)
        enmbedding = F.swapaxes(enmbedding, 1, 2)
        residual_input = enmbedding if self.ndim_h == self.ndim_embedding else 0

        out_data = self._forward_layer_one_step(0, enmbedding)[:, :, -ksize:]
        for layer_index in xrange(1, self.num_blocks * self.num_layers_per_block):
            out_data = self._forward_layer_one_step(layer_index, out_data)[:, :, -ksize:]
            if (layer_index + 1) % self.num_layers_per_block == 0:
                if self.using_dropout:
                    out_data = F.dropout(out_data, ratio=self.dropout)
                out_data += residual_input
                residual_input = out_data

        out_data = out_data[..., -1, None]
        out_data = self.dense(out_data)
        out_data = F.reshape(F.swapaxes(out_data, 1, 2), (-1, self.vocab_size))

        return out_data
项目:NlpUtil    作者:trtd56    | 项目源码 | 文件源码
def pre(self, x):
        dims = len(x.shape) - 1

        if self.kernel_size == 1:
            ret = self.W(x)
        elif self.kernel_size == 2:
            if dims == 2:
                xprev = Variable(
                    self.xp.zeros((self.batch_size, 1, self.in_size),
                                  dtype=np.float32), volatile='AUTO')
                xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1)
            else:
                xtminus1 = self.x
            ret = self.W(x) + self.V(xtminus1)
        else:
            ret = F.swapaxes(self.conv(
                F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2)

        if not self.attention:
            return ret

        if dims == 1:
            enc = self.encoding[:, -1, :]
        else:
            enc = self.encoding[:, -1:, :]
        return sum(F.broadcast(self.U(enc), ret))
项目:NlpUtil    作者:trtd56    | 项目源码 | 文件源码
def __array(array, dtype, is_volatile, transposition):
        volatile = "ON" if is_volatile else "OFF"
        if not transposition:
            return Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile)
        else:
            return F.swapaxes(Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile), 0, 1)
项目:NlpUtil    作者:trtd56    | 项目源码 | 文件源码
def __array(array, dtype, is_volatile, transposition):
        volatile = "ON" if is_volatile else "OFF"
        if not transposition:
            return Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile)
        else:
            return F.swapaxes(Variable(XP.__lib.array(array, dtype=dtype), volatile=volatile), 0, 1)
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def __call__(self, X, ht_enc, H_enc, skip_mask=None):
        pad = self._kernel_size - 1
        WX = self.W(X)
        if pad > 0:
            WX = WX[:, :, :-pad]
        Vh = self.V(ht_enc)
        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        self.contexts = []
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if t == 0:
                ct = (1 - f) * z
                self.contexts.append(ct)
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if t == 0:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
项目:chainer-qrnn    作者:musyoku    | 项目源码 | 文件源码
def forward_one_step(self, X, ht_enc, H_enc, skip_mask):
        pad = self._kernel_size - 1
        WX = self.W(X)[:, :, -pad-1, None]
        Vh = self.V(ht_enc)

        Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX)

        # f-pooling
        Z, F, O = functions.split_axis(WX + Vh, 3, axis=1)
        Z = functions.tanh(Z)
        F = self.zoneout(F)
        O = functions.sigmoid(O)
        T = Z.shape[2]

        # compute ungated hidden states
        for t in xrange(T):
            z = Z[..., t]
            f = F[..., t]
            if self.contexts is None:
                ct = (1 - f) * z
                self.contexts = [ct]
            else:
                ct = f * self.contexts[-1] + (1 - f) * z
                self.contexts.append(ct)

        if skip_mask is not None:
            assert skip_mask.shape[1] == H_enc.shape[2]
            softmax_bias = (skip_mask == 0) * -1e6

        # compute attention weights (eq.8)
        H_enc = functions.swapaxes(H_enc, 1, 2)
        for t in xrange(T):
            ct = self.contexts[t - T]
            bias = 0 if skip_mask is None else softmax_bias[..., None]  # to skip PAD
            mask = 1 if skip_mask is None else skip_mask[..., None]     # to skip PAD
            alpha = functions.batch_matmul(H_enc, ct) + bias
            alpha = functions.softmax(alpha) * mask
            alpha = functions.broadcast_to(alpha, H_enc.shape)  # copy
            kt = functions.sum(alpha * H_enc, axis=1)
            ot = O[..., t]
            self.ht = ot * self.o(functions.concat((kt, ct), axis=1))

            if self.H is None:
                self.H = functions.expand_dims(self.ht, 2)
            else:
                self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2)

        return self.H
项目:mlpnlp-nmt    作者:mlpnlp    | 项目源码 | 文件源码
def encodeSentenceFWD(self, train_mode, sentence, args, dropout_rate):
        if args.gpu_enc != args.gpu_dec:  # enc?dec??GPU???
            chainer.cuda.get_device(args.gpu_enc).use()
        encLen = len(sentence)  # ??
        cMBSize = len(sentence[0])  # minibatch size

        # ?????embedding???  ??????????
        encEmbList = self.getEncoderInputEmbeddings(sentence, args)

        flag_train = (train_mode > 0)
        lstmVars = [0] * self.n_layers * 2
        if self.flag_merge_encfwbw == 0:  # fw?bw??????????????
            hyf, cyf, fwHout = self.model.encLSTM_f(
                None, None, encEmbList, flag_train, args)  # ???
            hyb, cyb, bkHout = self.model.encLSTM_b(
                None, None, encEmbList[::-1], flag_train, args)  # ???
            for z in six.moves.range(self.n_layers):
                lstmVars[2 * z] = cyf[z] + cyb[z]
                lstmVars[2 * z + 1] = hyf[z] + hyb[z]
        elif self.flag_merge_encfwbw == 1:  # fw?bw????????
            sp = (cMBSize, self.hDim)
            for z in six.moves.range(self.n_layers):
                if z == 0:  # ??? embedding???
                    biH = encEmbList
                else:  # ????? ????????
                    # ????????bkHout????????????
                    biH = fwHout + bkHout[::-1]
                # z?????
                hyf, cyf, fwHout = self.model.encLSTM_f(
                    z, biH, flag_train, dropout_rate, args)
                # z??????
                hyb, cyb, bkHout = self.model.encLSTM_b(
                    z, biH[::-1], flag_train, dropout_rate, args)
                # ??????????????????????????
                # ???????
                lstmVars[2 * z] = chaFunc.reshape(cyf + cyb, sp)
                lstmVars[2 * z + 1] = chaFunc.reshape(hyf + hyb, sp)
        else:
            assert 0, "ERROR"

        # ?????
        if self.flag_enc_boseos == 0:  # default
            # fwHout?[:,]???????????
            biHiddenStack = fwHout[:, ] + bkHout[::-1]
        elif self.flag_enc_boseos == 1:
            bkHout2 = bkHout[::-1]  # ?????
            biHiddenStack = fwHout[1:encLen - 1, ] + bkHout2[1:encLen - 1, ]
            # BOS, EOS?????? TODO ??????0??????????
            encLen -= 2
        else:
            assert 0, "ERROR"
        # (enc????, minibatch??, ??????)
        #    => (minibatch??, enc????, ??????)???
        biHiddenStackSW01 = chaFunc.swapaxes(biHiddenStack, 0, 1)
        # ?LSTM???????????decoder?LSTM????????
        lstmVars = chaFunc.stack(lstmVars)
        # encoder????encInfoObject???????
        retO = self.encInfoObject(biHiddenStackSW01, lstmVars, encLen, cMBSize)
        return retO