我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.python.ops.math_ops.matmul()。
def _CplxMatMulGrad(op, grad): inp0 = tf.conj(op.inputs[0]) inp1 = tf.conj(op.inputs[1]) t_a = op.get_attr("transpose_a") t_b = op.get_attr("transpose_b") if not t_a and not t_b: return (math_ops.matmul( grad, inp1, transpose_b=True), math_ops.matmul( inp0, grad, transpose_a=True)) elif not t_a and t_b: return (math_ops.matmul(grad, inp1), math_ops.matmul( grad, inp0, transpose_a=True)) elif t_a and not t_b: return (math_ops.matmul( inp1, grad, transpose_b=True), math_ops.matmul(inp0, grad)) elif t_a and t_b: return (math_ops.matmul( inp1, grad, transpose_a=True, transpose_b=True), math_ops.matmul( grad, inp0, transpose_a=True, transpose_b=True))
def _linear_predictions(self, examples): """Returns predictions of the form w*x.""" with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) result = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. result += math_ops.segment_sum( math_ops.mul( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) dense_features = self._convert_n_to_tensor(examples['dense_features']) dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) for i in range(len(dense_variables)): result += math_ops.matmul(dense_features[i], array_ops.expand_dims( dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result, [-1])
def to_weighted_sum(self, transformed_input_tensor, num_outputs=1, weight_collections=None, trainable=True): """Returns a Tensor as linear predictions and a list of created Variable.""" def _weight(name): return variable_scope.get_variable( name, shape=[self.dimension, num_outputs], initializer=init_ops.zeros_initializer, collections=_add_variable_collection(weight_collections)) if self.name: weight = _weight("weight") else: # Old behavior to support a subset of old checkpoints. weight = _weight("_weight") # The _RealValuedColumn has the shape of [batch_size, column.dimension]. log_odds_by_dim = math_ops.matmul( transformed_input_tensor, weight, name="matmul") return log_odds_by_dim, [weight]
def to_weighted_sum(self, input_tensor, num_outputs=1, weight_collections=None, trainable=True): def _weight(name): return variable_scope.get_variable( name, shape=[self.dimension, num_outputs], initializer=init_ops.zeros_initializer, collections=_add_variable_collection(weight_collections)) if self.name: weight = _weight("weight") else: # Old behavior to support a subset of old checkpoints. weight = _weight("_weight") # The _RealValuedColumn has the shape of [batch_size, column.dimension]. log_odds_by_dim = math_ops.matmul(input_tensor, weight, name="matmul") return log_odds_by_dim, [weight]
def _chol_capacitance(self, batch_mode): """Cholesky factorization of the capacitance term.""" # Cholesky factor for (D^{-1} + V^T M^{-1} V), which is sometimes # known as the "capacitance" matrix. # self._operator will use batch if need be. Automatically. We cannot force # that here. # M^{-1} V minv_v = self._operator.solve(self._v) # V^T M^{-1} V if batch_mode: vt_minv_v = math_ops.batch_matmul(self._v, minv_v, adj_x=True) else: vt_minv_v = math_ops.matmul(self._v, minv_v, transpose_a=True) # D^{-1} + V^T M^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vt_minv_v) # Cholesky[D^{-1} + V^T M^{-1} V] return linalg_ops.cholesky(capacitance)
def _chol_capacitance(self, batch_mode): """Cholesky factorization of the capacitance term.""" # Cholesky factor for (D^{-1} + V^T M^{-1} V), which is sometimes # known as the "capacitance" matrix. # self._operator will use batch if need be. Automatically. We cannot force # that here. # M^{-1} V minv_v = self._operator.solve(self._v) # V^T M^{-1} V vt_minv_v = math_ops.matmul(self._v, minv_v, adjoint_a=True) # D^{-1} + V^T M^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vt_minv_v) # Cholesky[D^{-1} + V^T M^{-1} V] return linalg_ops.cholesky(capacitance)
def __call__(self, inputs, state, scope=None): with vs.variable_scope(scope or "eunn_cell"): state = _eunn_loop(state, self._capacity, self.diag_vec, self.off_vec, self.diag, self._fft) input_matrix_init = init_ops.random_uniform_initializer(-0.01, 0.01) if self._comp: input_matrix_re = vs.get_variable("U_re", [inputs.get_shape()[-1], self._hidden_size], initializer=input_matrix_init) input_matrix_im = vs.get_variable("U_im", [inputs.get_shape()[-1], self._hidden_size], initializer=input_matrix_init) inputs_re = math_ops.matmul(inputs, input_matrix_re) inputs_im = math_ops.matmul(inputs, input_matrix_im) inputs = math_ops.complex(inputs_re, inputs_im) else: input_matrix = vs.get_variable("U", [inputs.get_shape()[-1], self._hidden_size], initializer=input_matrix_init) inputs = math_ops.matmul(inputs, input_matrix) bias = vs.get_variable("modReLUBias", [self._hidden_size], initializer=init_ops.constant_initializer()) output = self._activation((inputs + state), bias, self._comp) return output, output
def testStochasticVariablesWithPrior(self): shape = (10, 20) prior = dist.Normal(0., 1.) with variable_scope.variable_scope( "stochastic_variables", custom_getter=sv.make_stochastic_variable_getter( dist_cls=dist.NormalWithSoftplusScale, prior=prior)): w = variable_scope.get_variable("weights", shape) x = random_ops.random_uniform((8, 10)) y = math_ops.matmul(x, w) prior_map = vi._find_variational_and_priors(y, None) self.assertEqual(prior_map[w], prior) elbo = vi.elbo(y, keep_batch_dim=False) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(elbo)
def testStochasticVariablesWithCallablePriorInitializer(self): def prior_init(shape, dtype): return dist.Normal( array_ops.zeros(shape, dtype), array_ops.ones(shape, dtype)) with variable_scope.variable_scope( "stochastic_variables", custom_getter=sv.make_stochastic_variable_getter( dist_cls=dist.NormalWithSoftplusScale, prior=prior_init)): w = variable_scope.get_variable("weights", (10, 20)) x = random_ops.random_uniform((8, 10)) y = math_ops.matmul(x, w) prior_map = vi._find_variational_and_priors(y, None) self.assertTrue(isinstance(prior_map[w], dist.Normal)) elbo = vi.elbo(y, keep_batch_dim=False) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(elbo)
def testGradientWithZeroWeight(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) inputs = array_ops.ones((2, 3)) weights = variable_scope.get_variable( 'weights', shape=[3, 4], initializer=init_ops.truncated_normal_initializer()) predictions = math_ops.matmul(inputs, weights) optimizer = momentum_lib.MomentumOptimizer( learning_rate=0.001, momentum=0.9) loss = loss_ops.mean_pairwise_squared_error(predictions, predictions, 0) gradients_to_variables = optimizer.compute_gradients(loss) init_op = variables.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) for grad, _ in gradients_to_variables: np_grad = sess.run(grad) self.assertFalse(np.isnan(np_grad).any())
def _linear_predictions(self, examples): """Returns predictions of the form w*x.""" with name_scope('sdca/prediction'): sparse_variables = self._convert_n_to_tensor(self._variables[ 'sparse_features_weights']) result = 0.0 for sfc, sv in zip(examples['sparse_features'], sparse_variables): # TODO(sibyl-Aix6ihai): following does not take care of missing features. result += math_ops.segment_sum( math_ops.multiply( array_ops.gather(sv, sfc.feature_indices), sfc.feature_values), sfc.example_indices) dense_features = self._convert_n_to_tensor(examples['dense_features']) dense_variables = self._convert_n_to_tensor(self._variables[ 'dense_features_weights']) for i in range(len(dense_variables)): result += math_ops.matmul(dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result, [-1])
def test_matrix_matrix(self): xy_lt = core.LabeledTensor( array_ops.reshape(math_ops.range(6), (2, 3)), ['x', 'y']) yz_lt = core.LabeledTensor( array_ops.reshape(math_ops.range(12), (3, 4)), ['y', 'z']) matmul_lt = ops.matmul(xy_lt, yz_lt) golden_lt = core.LabeledTensor( math_ops.matmul(xy_lt.tensor, yz_lt.tensor), ['x', 'z']) self.assertLabeledTensorsEqual(matmul_lt, golden_lt) transpose = lambda x: core.transpose(x, list(x.axes.keys())[::-1]) matmul_lt = ops.matmul(xy_lt, transpose(yz_lt)) self.assertLabeledTensorsEqual(matmul_lt, golden_lt) matmul_lt = ops.matmul(transpose(xy_lt), yz_lt) self.assertLabeledTensorsEqual(matmul_lt, golden_lt) matmul_lt = ops.matmul(transpose(xy_lt), transpose(yz_lt)) self.assertLabeledTensorsEqual(matmul_lt, golden_lt) matmul_lt = ops.matmul(yz_lt, xy_lt) self.assertLabeledTensorsEqual(matmul_lt, transpose(golden_lt))
def test_invalid(self): scalar_lt = core.LabeledTensor(array_ops.ones(()), []) x_lt = core.LabeledTensor(array_ops.ones((2,)), ['x']) x2_lt = core.LabeledTensor(array_ops.ones((3,)), ['x']) y_lt = core.LabeledTensor(array_ops.ones((3,)), ['y']) xy_lt = core.LabeledTensor(array_ops.ones((2, 3)), ['x', 'y']) xyz_lt = core.LabeledTensor(array_ops.ones((2, 3, 1)), ['x', 'y', 'z']) with self.assertRaisesRegexp(ValueError, 'inputs with at least rank'): ops.matmul(x_lt, scalar_lt) with self.assertRaises(NotImplementedError): ops.matmul(x_lt, xyz_lt) with self.assertRaisesRegexp(ValueError, 'exactly one axis in common'): ops.matmul(x_lt, y_lt) with self.assertRaises(NotImplementedError): ops.matmul(xy_lt, xy_lt) with self.assertRaisesRegexp(ValueError, 'does not match'): ops.matmul(x_lt, x2_lt)
def _compute_euclidean_distance(cls, inputs, clusters): """Computes Euclidean distance between each input and each cluster center. Args: inputs: list of input Tensors. clusters: cluster Tensor. Returns: list of Tensors, where each element corresponds to each element in inputs. The value is the distance of each row to all the cluster centers. """ output = [] for inp in inputs: with ops.colocate_with(inp): # Computes Euclidean distance. Note the first and third terms are # broadcast additions. squared_distance = (math_ops.reduce_sum( math_ops.square(inp), 1, keep_dims=True) - 2 * math_ops.matmul( inp, clusters, transpose_b=True) + array_ops.transpose( math_ops.reduce_sum( math_ops.square(clusters), 1, keep_dims=True))) output.append(squared_distance) return output
def _compute_cosine_distance(cls, inputs, clusters, inputs_normalized=True): """Computes cosine distance between each input and each cluster center. Args: inputs: list of input Tensor. clusters: cluster Tensor inputs_normalized: if True, it assumes that inp and clusters are normalized and computes the dot product which is equivalent to the cosine distance. Else it L2 normalizes the inputs first. Returns: list of Tensors, where each element corresponds to each element in inp. The value is the distance of each row to all the cluster centers. """ output = [] if not inputs_normalized: with ops.colocate_with(clusters): clusters = nn_impl.l2_normalize(clusters, dim=1) for inp in inputs: with ops.colocate_with(inp): if not inputs_normalized: inp = nn_impl.l2_normalize(inp, dim=1) output.append(1 - math_ops.matmul(inp, clusters, transpose_b=True)) return output
def _covariance(x, diag): """Defines the covariance operation of a matrix. Args: x: a matrix Tensor. Dimension 0 should contain the number of examples. diag: if True, it computes the diagonal covariance. Returns: A Tensor representing the covariance of x. In the case of diagonal matrix just the diagonal is returned. """ num_points = math_ops.to_float(array_ops.shape(x)[0]) x -= math_ops.reduce_mean(x, 0, keep_dims=True) if diag: cov = math_ops.reduce_sum( math_ops.square(x), 0, keep_dims=True) / (num_points - 1) else: cov = math_ops.matmul(x, x, transpose_a=True) / (num_points - 1) return cov
def _define_diag_covariance_probs(self, shard_id, shard): """Defines the diagonal covariance probabilities per example in a class. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. Returns a matrix num_examples * num_classes. """ # num_classes X 1 # TODO(xavigonzalvo): look into alternatives to log for # reparametrization of variance parameters. det_expanded = math_ops.reduce_sum( math_ops.log(self._covs + 1e-3), 1, keep_dims=True) diff = shard - self._means x2 = math_ops.square(diff) cov_expanded = array_ops.expand_dims(1.0 / (self._covs + 1e-3), 2) # num_classes X num_examples x2_cov = math_ops.matmul(x2, cov_expanded) x2_cov = array_ops.transpose(array_ops.squeeze(x2_cov, [2])) self._probs[shard_id] = -0.5 * ( math_ops.to_float(self._dimensions) * math_ops.log(2.0 * np.pi) + array_ops.transpose(det_expanded) + x2_cov)
def _prepare_gramian(self, factors, gramian): """Helper function to create ops to prepare/calculate gramian. Args: factors: Variable or list of Variable representing (sharded) factors. Used to compute the updated corresponding gramian value. gramian: Variable storing the gramian calculated from the factors. Returns: A op that updates the gramian with the calcuated value from the factors. """ partial_gramians = [] for f in factors: with ops.colocate_with(f): partial_gramians.append(math_ops.matmul(f, f, transpose_a=True)) with ops.colocate_with(gramian): prep_gramian = state_ops.assign(gramian, math_ops.add_n(partial_gramians)).op return prep_gramian
def test_apply(self): self._maybe_skip("apply") for use_placeholder in False, True: for shape in self._shapes_to_test: for dtype in self._dtypes_to_test: for adjoint in False, True: with self.test_session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( shape, dtype, use_placeholder=use_placeholder) x = self._make_x(operator, adjoint=adjoint) op_apply = operator.apply(x, adjoint=adjoint) mat_apply = math_ops.matmul(mat, x, adjoint_a=adjoint) if not use_placeholder: self.assertAllEqual(op_apply.get_shape(), mat_apply.get_shape()) op_apply_v, mat_apply_v = sess.run([op_apply, mat_apply], feed_dict=feed_dict) self.assertAC(op_apply_v, mat_apply_v)
def _apply(self, x, adjoint=False): u = self.u v = self.v l = self.base_operator d = self.diag_operator leading_term = l.apply(x, adjoint=adjoint) if adjoint: uh_x = math_ops.matmul(u, x, adjoint_a=True) d_uh_x = d.apply(uh_x, adjoint=adjoint) v_d_uh_x = math_ops.matmul(v, d_uh_x) return leading_term + v_d_uh_x else: vh_x = math_ops.matmul(v, x, adjoint_a=True) d_vh_x = d.apply(vh_x, adjoint=adjoint) u_d_vh_x = math_ops.matmul(u, d_vh_x) return leading_term + u_d_vh_x
def _chol_capacitance(self, batch_mode): """Cholesky factorization of the capacitance term.""" # Cholesky factor for (D^{-1} + V^T M^{-1} V), which is sometimes # known as the "capacitance" matrix. # We can do a Cholesky decomposition, since a priori M is a # positive-definite Hermitian matrix, which causes the "capacitance" to # also be positive-definite Hermitian, and thus have a Cholesky # decomposition. # self._operator will use batch if need be. Automatically. We cannot force # that here. # M^{-1} V minv_v = self._operator.solve(self._v) # V^T M^{-1} V vt_minv_v = math_ops.matmul(self._v, minv_v, adjoint_a=True) # D^{-1} + V^T M^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vt_minv_v) # Cholesky[D^{-1} + V^T M^{-1} V] return linalg_ops.cholesky(capacitance)
def sqrt_matmul(self, x): """Computes `matmul(self, x)`. Doesn't actually do the sqrt! Named as such to agree with API. Args: x: `Tensor` Returns: self_times_x: `Tensor` """ m_x = math_ops.matmul(self._m, x) vt_x = math_ops.matmul(self._v, x, adjoint_a=True) d_vt_x = self._d.matmul(vt_x) v_d_vt_x = math_ops.matmul(self._v, d_vt_x) return m_x + v_d_vt_x
def sqrt_solve(self, x): """Computes `solve(self, x)`. Doesn't actually do the sqrt! Named as such to agree with API. To compute (M + V D V.T), we use the the Woodbury matrix identity: inv(M + V D V.T) = inv(M) - inv(M) V inv(C) V.T inv(M) where, C = inv(D) + V.T inv(M) V. See: https://en.wikipedia.org/wiki/Woodbury_matrix_identity Args: x: `Tensor` Returns: inv_of_self_times_x: `Tensor` """ minv_x = linalg_ops.matrix_triangular_solve(self._m, x) vt_minv_x = math_ops.matmul(self._v, minv_x, transpose_a=True) cinv_vt_minv_x = linalg_ops.matrix_solve( self._woodbury_sandwiched_term(), vt_minv_x) v_cinv_vt_minv_x = math_ops.matmul(self._v, cinv_vt_minv_x) minv_v_cinv_vt_minv_x = linalg_ops.matrix_triangular_solve( self._m, v_cinv_vt_minv_x) return minv_x - minv_v_cinv_vt_minv_x
def _woodbury_sandwiched_term(self): """Computes the sandwiched term in the Woodbury identity. Computes the "`C`" in the the identity: inv(M + V D V.T) = inv(M) - inv(M) V inv(C) V.T inv(M) where, C = inv(D) + V.T inv(M) V. See: https://en.wikipedia.org/wiki/Woodbury_matrix_identity Returns: woodbury_sandwich_term: A `Tensor` to be used like `C`, above. """ minv_v = linalg_ops.matrix_triangular_solve(self._m, self._v) vt_minv_v = math_ops.matmul(self._v, minv_v, adjoint_a=True) return self._d_inv.add_to_tensor(vt_minv_v)
def _process_matrix(self, matrix, min_rank, event_ndims): """Helper to __init__ which gets matrix in batch-ready form.""" # Pad the matrix so that matmul works in the case of a matrix and vector # input. Keep track if the matrix was padded, to distinguish between a # rank 3 tensor and a padded rank 2 tensor. # TODO(srvasude): Remove side-effects from functions. Its currently unbroken # but error-prone since the function call order may change in the future. self._rank_two_event_ndims_one = math_ops.logical_and( math_ops.equal(array_ops.rank(matrix), min_rank), math_ops.equal(event_ndims, 1)) left = array_ops.where(self._rank_two_event_ndims_one, 1, 0) pad = array_ops.concat( [array_ops.ones( [left], dtype=dtypes.int32), array_ops.shape(matrix)], 0) return array_ops.reshape(matrix, pad)
def testSqrtMatmul(self): # Square roots are not unique, but we should have SS^T x = Ax, and in our # case, we should have S = S^T, so SSx = Ax. with self.test_session(): for batch_shape in [(), ( 2, 3,)]: for k in [1, 4]: operator, mat = self._build_operator_and_mat(batch_shape, k) # Work with 5 simultaneous systems. 5 is arbitrary. x = self._rng.randn(*(batch_shape + (k, 5))) self._compare_results( expected=math_ops.matmul(mat, x).eval(), actual=operator.sqrt_matmul(operator.sqrt_matmul(x)))
def testSqrtMatmulSingleMatrix(self): with self.test_session(): batch_shape = () for k in [1, 4]: x_shape = batch_shape + (k, 3) x = self._rng.rand(*x_shape) chol_shape = batch_shape + (k, k) chol = self._random_cholesky_array(chol_shape) operator = operator_pd_cholesky.OperatorPDCholesky(chol) sqrt_operator_times_x = operator.sqrt_matmul(x) expected = math_ops.matmul(chol, x) self.assertEqual(expected.get_shape(), sqrt_operator_times_x.get_shape()) self.assertAllClose(expected.eval(), sqrt_operator_times_x.eval())
def testSqrtMatmulBatchMatrixWithTranspose(self): with self.test_session(): batch_shape = (2, 3) for k in [1, 4]: x_shape = batch_shape + (5, k) x = self._rng.rand(*x_shape) chol_shape = batch_shape + (k, k) chol = self._random_cholesky_array(chol_shape) operator = operator_pd_cholesky.OperatorPDCholesky(chol) sqrt_operator_times_x = operator.sqrt_matmul(x, transpose_x=True) # tf.batch_matmul is defined x * y, so "y" is on the right, not "x". expected = math_ops.matmul(chol, x, adjoint_b=True) self.assertEqual(expected.get_shape(), sqrt_operator_times_x.get_shape()) self.assertAllClose(expected.eval(), sqrt_operator_times_x.eval())
def testMatmulSingleMatrix(self): with self.test_session(): batch_shape = () for k in [1, 4]: x_shape = batch_shape + (k, 5) x = self._rng.rand(*x_shape) chol_shape = batch_shape + (k, k) chol = self._random_cholesky_array(chol_shape) matrix = math_ops.matmul(chol, chol, adjoint_b=True) operator = operator_pd_cholesky.OperatorPDCholesky(chol) expected = math_ops.matmul(matrix, x) self.assertEqual(expected.get_shape(), operator.matmul(x).get_shape()) self.assertAllClose(expected.eval(), operator.matmul(x).eval())
def testMatmulBatchMatrix(self): with self.test_session(): batch_shape = (2, 3) for k in [1, 4]: x_shape = batch_shape + (k, 5) x = self._rng.rand(*x_shape) chol_shape = batch_shape + (k, k) chol = self._random_cholesky_array(chol_shape) matrix = math_ops.matmul(chol, chol, adjoint_b=True) operator = operator_pd_cholesky.OperatorPDCholesky(chol) expected = math_ops.matmul(matrix, x) self.assertEqual(expected.get_shape(), operator.matmul(x).get_shape()) self.assertAllClose(expected.eval(), operator.matmul(x).eval())
def testMatmulBatchMatrixWithTranspose(self): with self.test_session(): batch_shape = (2, 3) for k in [1, 4]: x_shape = batch_shape + (5, k) x = self._rng.rand(*x_shape) chol_shape = batch_shape + (k, k) chol = self._random_cholesky_array(chol_shape) matrix = math_ops.matmul(chol, chol, adjoint_b=True) operator = operator_pd_cholesky.OperatorPDCholesky(chol) operator_times_x = operator.matmul(x, transpose_x=True) # tf.batch_matmul is defined x * y, so "y" is on the right, not "x". expected = math_ops.matmul(matrix, x, adjoint_b=True) self.assertEqual(expected.get_shape(), operator_times_x.get_shape()) self.assertAllClose(expected.eval(), operator_times_x.eval())
def _updated_mat(self, mat, v, diag): # Get dense matrix defined by its square root, which is an update of `mat`: # A = (mat + v D v^T) (mat + v D v^T)^T # D is the diagonal matrix with `diag` on the diagonal. # If diag is None, then it defaults to the identity matrix, so DV^T = V^T if diag is None: diag_vt = array_ops.matrix_transpose(v) else: diag_mat = array_ops.matrix_diag(diag) diag_vt = math_ops.matmul(diag_mat, v, adjoint_b=True) v_diag_vt = math_ops.matmul(v, diag_vt) sqrt = mat + v_diag_vt a = math_ops.matmul(sqrt, sqrt, adjoint_b=True) return a.eval()
def _sqrt_matmul(self, x, transpose_x=False): v = self._v m = self._operator d = self._diag_operator # The operators call the appropriate matmul/batch_matmul automatically. We # cannot override. # matmul is defined as: a * b, so transpose_a, transpose_b are used. # transpose the left and right. mx = m.matmul(x, transpose_x=transpose_x) vt_x = math_ops.matmul(v, x, transpose_a=True, transpose_b=transpose_x) d_vt_x = d.matmul(vt_x) v_d_vt_x = math_ops.matmul(v, d_vt_x) return mx + v_d_vt_x
def _sqrt_solve(self, rhs): # Recall the square root of this operator is M + VDV^T. # The Woodbury formula gives: # (M + VDV^T)^{-1} # = M^{-1} - M^{-1} V (D^{-1} + V^T M^{-1} V)^{-1} V^T M^{-1} # = M^{-1} - M^{-1} V C^{-1} V^T M^{-1} # where C is the capacitance matrix. # TODO(jvdillon) Determine if recursively applying rank-1 updates is more # efficient. May not be possible because a general n x n matrix can be # represeneted as n rank-1 updates, and solving with this matrix is always # done in O(n^3) time. m = self._operator v = self._v cchol = self._chol_capacitance(batch_mode=False) # The operators will use batch/singleton mode automatically. We don't # override. # M^{-1} rhs minv_rhs = m.solve(rhs) # V^T M^{-1} rhs vt_minv_rhs = math_ops.matmul(v, minv_rhs, transpose_a=True) # C^{-1} V^T M^{-1} rhs cinv_vt_minv_rhs = linalg_ops.cholesky_solve(cchol, vt_minv_rhs) # V C^{-1} V^T M^{-1} rhs v_cinv_vt_minv_rhs = math_ops.matmul(v, cinv_vt_minv_rhs) # M^{-1} V C^{-1} V^T M^{-1} rhs minv_v_cinv_vt_minv_rhs = m.solve(v_cinv_vt_minv_rhs) # M^{-1} - M^{-1} V C^{-1} V^T M^{-1} return minv_rhs - minv_v_cinv_vt_minv_rhs
def _sqrt_to_dense(self): v = self._v d = self._diag_operator m = self._operator d_vt = d.matmul(v, transpose_x=True) # Batch op won't be efficient for singletons. Currently we don't break # to_dense into batch/singleton methods. v_d_vt = math_ops.batch_matmul(v, d_vt) m_plus_v_d_vt = m.to_dense() + v_d_vt return m_plus_v_d_vt
def _matmul(self, x, transpose_x=False): # tf.matmul is defined a * b. chol = array_ops.matrix_band_part(self._chol, -1, 0) chol_times_x = math_ops.matmul( chol, x, transpose_a=True, transpose_b=transpose_x) return math_ops.matmul(chol, chol_times_x)
def _sqrt_matmul(self, x, transpose_x=False): chol = array_ops.matrix_band_part(self._chol, -1, 0) # tf.matmul is defined a * b return math_ops.matmul(chol, x, transpose_b=transpose_x)
def _variance(self): x = math_ops.sqrt(self.df) * self.scale_operator_pd.to_dense() d = array_ops.expand_dims(array_ops.matrix_diag_part(x), -1) v = math_ops.square(x) + math_ops.matmul(d, d, adjoint_b=True) if self.cholesky_input_output_matrices: return linalg_ops.cholesky(v) return v
def _batch_sqrt_matmul(self, x, transpose_x=False): v = self._v m = self._operator d = self._diag_operator # The operators call the appropriate matmul/batch_matmul automatically. # We cannot override. # batch_matmul is defined as: x * y, so adjoint_a and adjoint_b are the # ways to transpose the left and right. mx = m.matmul(x, transpose_x=transpose_x) vt_x = math_ops.matmul(v, x, adjoint_a=True, adjoint_b=transpose_x) d_vt_x = d.matmul(vt_x) v_d_vt_x = math_ops.matmul(v, d_vt_x) return mx + v_d_vt_x
def _to_dense(self): sqrt = self.sqrt_to_dense() return math_ops.matmul(sqrt, sqrt, adjoint_b=True)
def _sqrt_to_dense(self): v = self._v d = self._diag_operator m = self._operator d_vt = d.matmul(v, transpose_x=True) # Batch op won't be efficient for singletons. Currently we don't break # to_dense into batch/singleton methods. v_d_vt = math_ops.matmul(v, d_vt) m_plus_v_d_vt = m.to_dense() + v_d_vt return m_plus_v_d_vt
def _batch_matmul(self, x, transpose_x=False): # tf.matmul is defined x * y, so "y" is on the right, not "x". chol = array_ops.matrix_band_part(self._chol, -1, 0) chol_times_x = math_ops.matmul( chol, x, adjoint_a=True, adjoint_b=transpose_x) return math_ops.matmul(chol, chol_times_x)
def _sqrt_matmul(self, x, transpose_x=False): chol = array_ops.matrix_band_part(self._chol, -1, 0) # tf.matmul is defined a * b return math_ops.matmul(chol, x, adjoint_b=transpose_x)
def _to_dense(self): chol = array_ops.matrix_band_part(self._chol, -1, 0) return math_ops.matmul(chol, chol, adjoint_b=True)
def _variance(self): p = self.p * array_ops.expand_dims(array_ops.ones_like(self.n), -1) outer_prod = math_ops.matmul( array_ops.expand_dims(self._mean_val, -1), array_ops.expand_dims(p, -2)) return array_ops.matrix_set_diag(-outer_prod, self._mean_val - self._mean_val * p)
def _forward(self, x): x, sample_shape = self.shaper.make_batch_of_event_sample_matrices(x) x = math_ops.matmul(self.scale, x) x = self.shaper.undo_make_batch_of_event_sample_matrices(x, sample_shape) x += self.shift return x