我们从Python开源项目中,提取了以下25个代码示例,用于说明如何使用chainer.functions.broadcast()。
def __call__(self, x): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] ya = self.a_stream(h) mean = F.reshape( F.sum(ya, axis=1) / self.n_actions, (batch_size, 1)) ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = self.v_stream(h) ya, ys = F.broadcast(ya, ys) q = ya + ys return action_value.DiscreteActionValue(q)
def __call__(self, ws, cs, ls, ts): h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim) h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim) batchsize, windowsize, _, _ = h_c.data.shape # (batchsize, windowsize, char_dim) h_c = F.sum(h_c, 2) h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1))) h_c = h_c / ls h = F.concat([h_w, h_c], 2) h = F.reshape(h, (batchsize, -1)) # ys = self.linear1(h) h = F.relu(self.linear1(h)) h = F.dropout(h, ratio=.5, train=self.train) ys = self.linear2(h) loss = F.softmax_cross_entropy(ys, ts) acc = F.accuracy(ys, ts) chainer.report({ "loss": loss, "accuracy": acc }, self) return loss
def __call__(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[..., :-pad] Vh = self.V(ht_enc) # copy Vh # e.g. # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[11, 12, 13]] # # Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX) # # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[[ 11 11 11] # [ 12 12 12] # [ 13 13 13] Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def gaussian_likelihood(x, mu, var): """Returns likelihood of ``x``, or ``N(x; mu, var)`` Args: x(float, numpy.ndarray or chainer.Variable): sample data mu(float or chainer.Variable): mean of Gaussian var(float): variance of Gaussian Returns: chainer.Variable: Variable holding likelihood ``N(x; mu, var)`` whose shape is same as that of ``x`` """ if numpy.isscalar(x): x = numpy.array(x) if isinstance(x, numpy.ndarray): x = chainer.Variable(x.astype(numpy.float32)) if numpy.isscalar(mu): mu = numpy.array(mu) if isinstance(mu, numpy.ndarray): mu = chainer.Variable(mu.astype(numpy.float32)) x, mu = F.broadcast(x, mu) return F.exp(-(x - mu) ** 2 / var / 2) / numpy.sqrt(2 * numpy.pi * var)
def __call__(self, x): minibatch_size = x.shape[0] activation = F.reshape(self.t(x), (-1, self.n_kernels, self.kernel_dim)) activation_ex = F.expand_dims(activation, 3) activation_ex_t = F.expand_dims(F.transpose(activation, (1, 2, 0)), 0) activation_ex, activation_ex_t = F.broadcast(activation_ex, activation_ex_t) diff = activation_ex - activation_ex_t xp = chainer.cuda.get_array_module(x.data) eps = F.expand_dims(xp.eye(minibatch_size, dtype=xp.float32), 1) eps = F.broadcast_to(eps, (minibatch_size, self.n_kernels, minibatch_size)) sum_diff = F.sum(abs(diff), axis=2) sum_diff = F.broadcast_to(sum_diff, eps.shape) abs_diff = sum_diff + eps minibatch_features = F.sum(F.exp(-abs_diff), 2) return F.concat((x, minibatch_features), axis=1)
def __call__(self, x): xp = chainer.cuda.get_array_module(x.data) batchsize = x.shape[0] if self.train_weights == False and self.initial_T is not None: self.T.W.data = self.initial_T M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel)) M = F.expand_dims(M, 3) M_T = F.transpose(M, (3, 1, 2, 0)) M, M_T = F.broadcast(M, M_T) norm = F.sum(abs(M - M_T), axis=2) eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape) c_b = F.exp(-(norm + 1e6 * eraser)) o_b = F.sum(c_b, axis=2) if self.train_weights == False: self.initial_T = self.T.W.data return F.concat((x, o_b), axis=1)
def convert_unk(embed, cs): cs = F.broadcast(cs) cexs = embed(cs) return (cexs,)
def __call__(self, e1, e2): ele2 = F.reshape( F.batch_matmul(e1[:,:,None], e2[:,None,:]), (-1, self.in_size1 * self.in_size2)) res = F.matmul(ele2, F.reshape(self.W, (self.in_size1 * self.in_size2, self.out_size))) + \ F.matmul(e1, self.V1) + \ F.matmul(e2, self.V2) res, bias = F.broadcast(res, self.b) return res + bias
def predict(self, tokens): self.train = False contexts = self.feature_extract(tokens) \ if isinstance(tokens[0], unicode) else tokens # contexts [(w, c, l), (w, c, l)] ws, cs, ls = zip(*contexts) max_cs_size = max(c.shape[1] for c in cs) new_cs = [] for c in cs: c = np.pad(c, ((0, 0), (0, max_cs_size - c.shape[1])), mode='constant', constant_values=-1) new_cs.append(c) ws = np.asarray(ws, 'i') cs = np.asarray(new_cs, 'i') ls = np.asarray(ls, 'f') h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim) h_c = self.emb_char(cs) # (batchsize, windowsize, max_char_len, char_dim) batchsize, windowsize, _, _ = h_c.data.shape # (batchsize, windowsize, char_dim) h_c = F.sum(h_c, 2) h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1))) h_c = h_c / ls h = F.concat([h_w, h_c], 2) h = F.reshape(h, (batchsize, -1)) # ys = self.linear(h) h = F.relu(self.linear1(h)) h = F.dropout(h, ratio=.5, train=self.train) ys = self.linear2(h) return ys.data
def attention_sum(encoding, query): alpha = F.softmax(F.batch_matmul(encoding, query, transb=True)) alpha, encoding = F.broadcast(alpha[:, :, :, None], encoding[:, :, None, :]) return F.sum(alpha * encoding, axis=1)
def pre(self, x): dims = len(x.shape) - 1 if self.kernel_size == 1: ret = self.W(x) elif self.kernel_size == 2: if dims == 2: xprev = Variable( self.xp.zeros((self.batch_size, 1, self.in_size), dtype=np.float32), volatile='AUTO') xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1) else: xtminus1 = self.x ret = self.W(x) + self.V(xtminus1) else: ret = F.swapaxes(self.conv( F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2) if not self.attention: return ret if dims == 1: enc = self.encoding[:, -1, :] else: enc = self.encoding[:, -1:, :] return sum(F.broadcast(self.U(enc), ret))
def forward_one_step(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X)[..., -pad-1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def check_forward(self, data): xs = [chainer.Variable(x) for x in data] bxs = functions.broadcast(*xs) # When len(xs) == 1, function returns a Variable object if isinstance(bxs, chainer.Variable): bxs = (bxs,) for bx in bxs: self.assertEqual(bx.data.shape, self.out_shape) self.assertEqual(bx.data.dtype, self.dtype)
def test_invalid_shape(self): x_data = numpy.zeros((3, 2, 5), dtype=numpy.int32) y_data = numpy.zeros((1, 3, 4), dtype=numpy.float32) x = chainer.Variable(x_data) y = chainer.Variable(y_data) with self.assertRaises(type_check.InvalidType): functions.broadcast(x, y)
def test_invalid_shape_fill(self): x_data = numpy.zeros((3, 2, 5), dtype=numpy.int32) y_data = numpy.zeros(4, dtype=numpy.float32) x = chainer.Variable(x_data) y = chainer.Variable(y_data) with self.assertRaises(type_check.InvalidType): functions.broadcast(x, y)
def test_no_args(self): with self.assertRaises(type_check.InvalidType): functions.broadcast()
def attention_history(self, dL, cue, train=True): D = F.concat(dL, axis=0) D, Cue = F.broadcast(D, cue) S = self.m(F.tanh(self.W_dm(D) + Cue)) S = F.softmax(F.reshape(S, (1, len(dL)))) pre_v = F.matmul(S, D) return pre_v
def __call__(self, x): # Apply a mask to the filters (optional) if self.filter_mask is not None: w, m = F.broadcast(self.W, Variable(self.filter_mask)) w = w * m # w = self.W * Variable(self.filter_mask) else: w = self.W # Transform the filters # w.shape == (out_channels, in_channels, input_stabilizer_size, ksize, ksize) # tw.shape == (out_channels, output_stabilizer_size, in_channels, input_stabilizer_size, ksize, ksize) tw = TransformGFilter(self.inds)(w) # Fold the transformed filters tw_shape = (self.out_channels * self.output_stabilizer_size, self.in_channels * self.input_stabilizer_size, self.ksize, self.ksize) tw = F.Reshape(tw_shape)(tw) # If flat_channels is False, we need to flatten the input feature maps to have a single 1d feature dimension. if not self.flat_channels: batch_size = x.data.shape[0] in_ny, in_nx = x.data.shape[-2:] x = F.reshape(x, (batch_size, self.in_channels * self.input_stabilizer_size, in_ny, in_nx)) # Perform the 2D convolution y = F.convolution_2d(x, tw, b=None, stride=self.stride, pad=self.pad, use_cudnn=self.use_cudnn) # Unfold the output feature maps # We do this even if flat_channels is True, because we need to add the same bias to each G-feature map batch_size, _, ny_out, nx_out = y.data.shape y = F.reshape(y, (batch_size, self.out_channels, self.output_stabilizer_size, ny_out, nx_out)) # Add a bias to each G-feature map if self.usebias: bb = F.Reshape((1, self.out_channels, 1, 1, 1))(self.b) y, b = F.broadcast(y, bb) y = y + b # Flatten feature channels if needed if self.flat_channels: n, nc, ng, nx, ny = y.data.shape y = F.reshape(y, (n, nc * ng, nx, ny)) return y
def __call__(self, X, ht_enc, H_enc, skip_mask=None): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[:, :, :-pad] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states self.contexts = [] for t in xrange(T): z = Z[..., t] f = F[..., t] if t == 0: ct = (1 - f) * z self.contexts.append(ct) else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t] bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if t == 0: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def forward_one_step(self, X, ht_enc, H_enc, skip_mask): pad = self._kernel_size - 1 WX = self.W(X)[:, :, -pad-1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states for t in xrange(T): z = Z[..., t] f = F[..., t] if self.contexts is None: ct = (1 - f) * z self.contexts = [ct] else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_bias = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t - T] bias = 0 if skip_mask is None else softmax_bias[..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + bias alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[..., t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if self.H is None: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat((self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H