def __call__(self, w, train=True, dpratio=0.5): x = self.embed(w) self.maybe_init_state(len(x.data), x.data.dtype) for i in range(self.num_layers): if self.ignore_label is not None: enable = (x.data != 0) c = F.dropout(self.get_c(i), train=train, ratio=dpratio) h = F.dropout(self.get_h(i), train=train, ratio=dpratio) x = F.dropout(x, train=train, ratio=dpratio) c, h = self.get_l(i)(c, h, x) if self.ignore_label != None: self.set_c(i, F.where(enable, c, self.get_c(i))) self.set_h(i, F.where(enable, h, self.get_h(i))) else: self.set_c(i, c) self.set_h(i, h) x = self.get_h(i)
def attend(self, query, key, value, mask, minfs=None): """ Input shapes: q=(b, units, dec_l), k=(b, units, enc_l), v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l) """ # Calculate Attention Scores with Mask for Zero-padded Areas pre_a = F.batch_matmul(query, key, transa=True) # (b, dec_l, enc_l) minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \ if minfs is None else minfs pre_a = F.where(mask, pre_a, minfs) a = F.softmax(pre_a, axis=2) # if values in axis=2 are all -inf, they become nan. thus do re-mask. a = F.where(self.xp.isnan(a.data), self.xp.zeros(a.shape, dtype=a.dtype), a) reshaped_a = a[:, None] # (b, 1, dec_xl, enc_l) # Calculate Weighted Sum pre_c = F.broadcast_to(reshaped_a, value.shape) * value c = F.sum(pre_c, axis=3, keepdims=True) # (b, units, dec_xl, 1) return c
def __call__(self, w, train=True, dpratio=0.5): x = self.embed(w) self.maybe_init_state(len(x.data), x.data.dtype) for i in range(self.num_layers): if self.ignore_label is not None: enable = (x.data != 0) c = F.dropout(self.get_c(i), train=train, ratio=dpratio) h = F.dropout(self.get_h(i), train=train, ratio=dpratio) x = F.dropout(x, train=train, ratio=dpratio) c, h = self.get_l(i)(c, h, x) if self.ignore_label != None: self.set_c(i, F.where(enable, c, self.get_c(i))) self.set_h(i, F.where(enable, h, self.get_h(i))) else: self.set_c(i, c) self.set_h(i, h) x = self.get_h(i) x = F.dropout(x, train=train, ratio=dpratio) return self.hy(x)
def post_decode_once(self, output, state, train=True): lengths = state['lengths'] if self.byte: itos = self.vocab.itos consumed = self.xp.array([[len(itos(oi)) + 1] for oi in output.tolist()]) lengths -= consumed else: lengths -= 1 flags = chainer.Variable(lengths.data >= 0, volatile=not train) lengths = F.where(flags, lengths, self.zeros) state['lengths'] = lengths return state
def _attend(self, p): weight = F.batch_matmul(self.source_hiddens, p) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
def _attend(self, p): p = self.xh(p) p = F.expand_dims(p, 1) p = F.broadcast_to(p, self.shape2) h = F.tanh(self.h + p) shape3 = (self.batchsize * self.src_len, self.dim_hid) h_reshaped = F.reshape(h, shape3) weight_reshaped = self.hw(h_reshaped) weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1)) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
def check_forward(self, c_data, x_data, y_data): c = chainer.Variable(c_data) x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.where(c, x, y) self.assertEqual(x.data.shape, z.data.shape) for i in numpy.ndindex(c.data.shape): if c.data[i]: self.assertEqual(x.data[i], z.data[i]) else: self.assertEqual(y.data[i], z.data[i])
def __call__(self, x, mask=None): x = F.dropout(x, ratio=self.dropout) out, pregate = F.split_axis(self.conv(x), 2, axis=1) out = out * F.sigmoid(pregate) if mask is not None: out *= mask return out # TODO: For layers whose output is not directly fed to a gated linear # unit, we initialize weights from N (0, p 1/nl) where nl is the number of # input connections for each neuron.
def __accuracy(self, y, t): xp = self.xp b, c, n = y.data.shape v = np.arange(c, dtype=np.float32).reshape((1, -1, 1)).repeat(b, axis=0).repeat(n, axis=2) v = Variable(xp.asarray(v), volatile=True) r = F.sum(v * F.softmax(Variable(y.data, volatile=True)), axis=1) c = Variable(t.data >= 0, volatile=True) t = Variable(t.data.astype(np.float32), volatile=True) r = F.where(c, r, t) return F.sum(((r - t) * self.rating_unit) ** 2)