我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用tensorflow.eye()。
def update_link_matrix(self, link_matrix_old, precedence_weighting_old, write_weighting): """ Updating the link matrix takes some effort (in order to vectorize the implementation) Instead of the original index-by-index operation, it's all done at once. :param link_matrix_old: from previous time step, shape [batch_size, memory_size, memory_size] :param precedence_weighting_old: from previous time step, shape [batch_size, memory_size] :param write_weighting: from current time step, shape [batch_size, memory_size] :return: updated link matrix """ expanded = tf.expand_dims(write_weighting, axis=2) # vectorizing the paper's original implementation w = tf.tile(expanded, [1, 1, self.memory_size]) # shape [batch_size, memory_size, memory_size] # shape of w_transpose is the same: [batch_size, memory_size, memory_size] w_transp = tf.tile(tf.transpose(expanded, [0, 2, 1]), [1, self.memory_size, 1]) # in einsum, m and n are the same dimension because tensorflow doesn't support duplicated subscripts. Why? lm = (1 - w - w_transp) * link_matrix_old + tf.einsum("bn,bm->bmn", precedence_weighting_old, write_weighting) lm *= (1 - tf.eye(self.memory_size, batch_shape=[self.batch_size])) # making sure self links are off return tf.identity(lm, name="Link_matrix")
def test_whiten(self): """ make sure that predicting using the whitened representation is the sameas the non-whitened one. """ with self.test_context() as sess: Xs, X, F, k, num_data, feed_dict = self.prepare() k.compile(session=sess) K = k.K(X) + tf.eye(num_data, dtype=settings.float_type) * 1e-6 L = tf.cholesky(K) V = tf.matrix_triangular_solve(L, F, lower=True) Fstar_mean, Fstar_var = gpflow.conditionals.conditional(Xs, X, k, F) Fstar_w_mean, Fstar_w_var = gpflow.conditionals.conditional(Xs, X, k, V, white=True) mean1, var1 = sess.run([Fstar_w_mean, Fstar_w_var], feed_dict=feed_dict) mean2, var2 = sess.run([Fstar_mean, Fstar_var], feed_dict=feed_dict) # TODO: should tolerance be type dependent? assert_allclose(mean1, mean2) assert_allclose(var1, var2)
def _build_predict(self, Xnew, full_cov=False): """ Xnew is a data matrix, point at which we want to predict This method computes p(F* | Y ) where F* are points on the GP at Xnew, Y are noisy observations at X. """ Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance L = tf.cholesky(K) A = tf.matrix_triangular_solve(L, Kx, lower=True) V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X)) fmean = tf.matmul(A, V, transpose_a=True) + self.mean_function(Xnew) if full_cov: fvar = self.kern.K(Xnew) - tf.matmul(A, A, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) fvar = tf.tile(tf.expand_dims(fvar, 2), shape) else: fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]]) return fmean, fvar
def __init__(self, X, Y, kern, likelihood, mean_function=None, num_latent=None, **kwargs): """ X is a data matrix, size N x D Y is a data matrix, size N x R kern, likelihood, mean_function are appropriate GPflow objects """ X = DataHolder(X) Y = DataHolder(Y) GPModel.__init__(self, X, Y, kern, likelihood, mean_function, **kwargs) self.num_data = X.shape[0] self.num_latent = num_latent or Y.shape[1] self.q_mu = Parameter(np.zeros((self.num_data, self.num_latent))) q_sqrt = np.array([np.eye(self.num_data) for _ in range(self.num_latent)]).swapaxes(0, 2) transform = transforms.LowerTriangular(self.num_data, self.num_latent) self.q_sqrt = Parameter(q_sqrt, transform=transform)
def gather_indices_2d(x, block_shape, block_stride): """Getting gather indices.""" # making an identity matrix kernel kernel = tf.eye(block_shape[0] * block_shape[1]) kernel = reshape_range(kernel, 0, 1, [block_shape[0], block_shape[1], 1]) # making indices [1, h, w, 1] to appy convs x_shape = common_layers.shape_list(x) indices = tf.range(x_shape[2] * x_shape[3]) indices = tf.reshape(indices, [1, x_shape[2], x_shape[3], 1]) indices = tf.nn.conv2d( tf.cast(indices, tf.float32), kernel, strides=[1, block_stride[0], block_stride[1], 1], padding="VALID") # making indices [num_blocks, dim] to gather dims = common_layers.shape_list(indices)[:3] if all([isinstance(dim, int) for dim in dims]): num_blocks = functools.reduce(operator.mul, dims, 1) else: num_blocks = tf.reduce_prod(dims) indices = tf.reshape(indices, [num_blocks, -1]) return tf.cast(indices, tf.int32)
def normal_sample(mean, var, full_cov=False): if full_cov is False: z = tf.random_normal(tf.shape(mean), dtype=float_type) return mean + z * var ** 0.5 else: S, N, D = shape_as_list(mean) # var is SNND mean = tf.transpose(mean, (0, 2, 1)) # SND -> SDN var = tf.transpose(var, (0, 3, 1, 2)) # SNND -> SDNN # I = jitter * tf.eye(N, dtype=float_type)[None, None, :, :] # 11NN chol = tf.cholesky(var)# + I) # SDNN should be ok without as var already has jitter z = tf.random_normal([S, D, N, 1], dtype=float_type) f = mean + tf.matmul(chol, z)[:, :, :, 0] # SDN(1) return tf.transpose(f, (0, 2, 1)) # SND
def generate_y(self, y_labeled): y_unlabeled_tiled = tf.reshape(tf.tile(tf.eye(self._num_classes), [1, self._batch_size]), [self._num_classes * self._batch_size, self._num_classes]) y_all = tf.concat([y_labeled, y_unlabeled_tiled], 0) return y_all, y_unlabeled_tiled
def eye(n, m): return tf.eye(n, m)
def compute_moments(_inputs, moments=[2, 3]): """From an image input, compute moments""" _inputs_sq = tf.square(_inputs) _inputs_cube = tf.pow(_inputs, 3) height = int(_inputs.get_shape()[1]) width = int(_inputs.get_shape()[2]) channels = int(_inputs.get_shape()[3]) def ConvFlatten(x, kernel_size): # w_sum = tf.ones([kernel_size, kernel_size, channels, 1]) / (kernel_size * kernel_size * channels) w_sum = tf.eye(num_rows=channels, num_columns=channels, batch_shape=[kernel_size * kernel_size]) w_sum = tf.reshape(w_sum, [kernel_size, kernel_size, channels, channels]) w_sum = w_sum / (kernel_size * kernel_size) sum_ = tf.nn.conv2d(x, w_sum, strides=[1, 1, 1, 1], padding='VALID') size = prod_dim(sum_) assert size == (height - kernel_size + 1) * (width - kernel_size + 1) * channels, size return tf.reshape(sum_, [-1, size]) outputs = [] for size in [3, 4, 5]: mean = ConvFlatten(_inputs, size) square = ConvFlatten(_inputs_sq, size) var = square - tf.square(mean) if 2 in moments: outputs.append(var) if 3 in moments: cube = ConvFlatten(_inputs_cube, size) skewness = cube - 3.0 * mean * var - tf.pow(mean, 3) # Unnormalized outputs.append(skewness) return tf.concat(outputs, 1)
def _build_likelihood(self): """ Construct a tensorflow function to compute the likelihood. \log p(Y | theta). """ K = self.kern.K(self.X) + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * self.likelihood.variance L = tf.cholesky(K) m = self.mean_function(self.X) return multivariate_normal(self.Y, m, L)
def compile(self, session=None): """ Before calling the standard compile function, check to see if the size of the data has changed and add variational parameters appropriately. This is necessary because the shape of the parameters depends on the shape of the data. """ if not self.num_data == self.X.shape[0]: self.num_data = self.X.shape[0] self.q_mu = Parameter(np.zeros((self.num_data, self.num_latent))) self.q_sqrt = Parameter(np.eye(self.num_data)[:, :, None] * np.ones((1, 1, self.num_latent))) return super(VGP, self).compile(session=session)
def _build_likelihood(self): """ q_alpha, q_lambda are variational parameters, size N x R This method computes the variational lower bound on the likelihood, which is: E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)] with q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) . """ K = self.kern.K(self.X) K_alpha = tf.matmul(K, self.q_alpha) f_mean = K_alpha + self.mean_function(self.X) # compute the variance for each of the outputs I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0), [self.num_latent, 1, 1]) A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \ tf.expand_dims(tf.transpose(self.q_lambda), 2) * K L = tf.cholesky(A) Li = tf.matrix_triangular_solve(L, I) tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1) f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1)) # some statistics about A are used in the KL A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L))) trAi = tf.reduce_sum(tf.square(Li)) KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent + tf.reduce_sum(K_alpha * self.q_alpha)) v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y) return tf.reduce_sum(v_exp) - KL
def _build_likelihood(self): """ Construct a tf function to compute the likelihood of a general GP model. \log p(Y, V | theta). """ K = self.kern.K(self.X) L = tf.cholesky( K + tf.eye(tf.shape(self.X)[0], dtype=settings.float_type) * settings.numerics.jitter_level) F = tf.matmul(L, self.V) + self.mean_function(self.X) return tf.reduce_sum(self.likelihood.logp(F, self.Y))
def predict_f_samples(self, Xnew, num_samples): """ Produce samples from the posterior latent function(s) at the points Xnew. """ mu, var = self._build_predict(Xnew, full_cov=True) jitter = tf.eye(tf.shape(mu)[0], dtype=settings.float_type) * settings.numerics.jitter_level samples = [] for i in range(self.num_latent): L = tf.cholesky(var[:, :, i] + jitter) shape = tf.stack([tf.shape(L)[0], num_samples]) V = tf.random_normal(shape, dtype=settings.float_type) samples.append(mu[:, i:i + 1] + tf.matmul(L, V)) return tf.transpose(tf.stack(samples))
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type) output_dim = tf.cast(tf.shape(self.Y)[1], settings.float_type) err = self.Y - self.mean_function(self.X) Kdiag = self.kern.Kdiag(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) L = tf.cholesky(Kuu) sigma = tf.sqrt(self.likelihood.variance) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma AAT = tf.matmul(A, A, transpose_b=True) B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma # compute log marginal bound bound = -0.5 * num_data * output_dim * np.log(2 * np.pi) bound += tf.negative(output_dim) * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) bound -= 0.5 * num_data * output_dim * tf.log(self.likelihood.variance) bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance bound += 0.5 * output_dim * tf.reduce_sum(tf.matrix_diag_part(AAT)) return bound
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points. Note that this is very similar to the SGPR prediction, for which there are notes in the SGPR notebook. :param Xnew: Point to predict at. """ num_inducing = tf.shape(self.Z)[0] psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0) Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level Kus = self.kern.K(self.Z, Xnew) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def Kuu(self, kern, jitter=0.0): Kzz = kern.K(self.Z) Kzz += jitter * tf.eye(len(self), dtype=settings.dtypes.float_type) return Kzz
def Kuu(self, kern, jitter=0.0): if isinstance(kern, kernels.RBF): with decors.params_as_tensors_for(kern): Zmu, Zlen = kern._slice(self.Z, self.scales) idlengthscales2 = tf.square(kern.lengthscales + Zlen) sc = tf.sqrt( tf.expand_dims(idlengthscales2, 0) + tf.expand_dims(idlengthscales2, 1) - tf.square( kern.lengthscales)) d = self._cust_square_dist(Zmu, Zmu, sc) Kzz = kern.variance * tf.exp(-d / 2) * tf.reduce_prod(kern.lengthscales / sc, 2) Kzz += jitter * tf.eye(len(self), dtype=settings.float_type) return Kzz else: raise NotImplementedError( "Multiscale features not implemented for `%s`." % str(type(kern)))
def get_dictionary(self, session): """Fetch (approximately) the learned code dictionary. Args: session: TensorFlow session to use. Returns: The code dictionary, with shape (hidden_units, input_dim). """ fake_input = 1e15 * tf.eye(self.hidden_units) return session.run(self._decode_layer(fake_input, reuse=True))
def apply(self, is_train, x, x_mask=None): x_word_dim = tf.shape(x)[1] # (batch, x_word, key_word) dist_matrix = self.attention.get_scores(x, x) dist_matrix += tf.expand_dims(tf.eye(x_word_dim) * VERY_NEGATIVE_NUMBER, 0) # Mask out self joint_mask = compute_attention_mask(x_mask, x_mask, x_word_dim, x_word_dim) if joint_mask is not None: dist_matrix += VERY_NEGATIVE_NUMBER * (1 - tf.cast(joint_mask, dist_matrix.dtype)) if not self.alignment_bias: select_probs = tf.nn.softmax(dist_matrix) else: # Allow zero-attention by adding a learned bias to the normalizer bias = tf.exp(tf.get_variable("no-alignment-bias", initializer=tf.constant(-1.0, dtype=tf.float32))) dist_matrix = tf.exp(dist_matrix) select_probs = dist_matrix / (tf.reduce_sum(dist_matrix, axis=2, keep_dims=True) + bias) response = tf.matmul(select_probs, x) # (batch, x_words, q_dim) if self.merge is not None: with tf.variable_scope("merge"): response = self.merge.apply(is_train, response, x) return response else: return response
def convolve(image, pixel_filter, channels=3, name=None): """Perform a 2D pixel convolution on the given image. Arguments: image: A 3D `float32` `Tensor` of shape `[height, width, channels]`, where `channels` is the third argument to this function and the first two dimensions are arbitrary. pixel_filter: A 2D `Tensor`, representing pixel weightings for the kernel. This will be used to create a 4D kernel---the extra two dimensions are for channels (see `tf.nn.conv2d` documentation), and the kernel will be constructed so that the channels are independent: each channel only observes the data from neighboring pixels of the same channel. channels: An integer representing the number of channels in the image (e.g., 3 for RGB). Returns: A 3D `float32` `Tensor` of the same shape as the input. """ with tf.name_scope(name, 'convolve'): tf.assert_type(image, tf.float32) channel_filter = tf.eye(channels) filter_ = (tf.expand_dims(tf.expand_dims(pixel_filter, -1), -1) * tf.expand_dims(tf.expand_dims(channel_filter, 0), 0)) result_batch = tf.nn.conv2d(tf.stack([image]), # batch filter=filter_, strides=[1, 1, 1, 1], padding='SAME') return result_batch[0] # unbatch
def sim_multitask_GP(times,length,noise_vars,K_f,trainfrac): """ draw from a multitask GP. we continue to assume for now that the dim of the input space is 1, ie just time M: number of tasks (labs/vitals/time series) train_frac: proportion of full M x N data matrix Y to include """ M = np.shape(K_f)[0] N = len(times) n = N*M K_t = OU_kernel_np(length,times) #just a correlation function Sigma = np.diag(noise_vars) K = np.kron(K_f,K_t) + np.kron(Sigma,np.eye(N)) + 1e-6*np.eye(n) L_K = np.linalg.cholesky(K) y = np.dot(L_K,np.random.normal(0,1,n)) #Draw normal #get indices of which time series and which time point, for each element in y ind_kf = np.tile(np.arange(M),(N,1)).flatten('F') #vec by column ind_kx = np.tile(np.arange(N),(M,1)).flatten() #randomly dropout some fraction of fully observed time series perm = np.random.permutation(n) n_train = int(trainfrac*n) train_inds = perm[:n_train] y_ = y[train_inds] ind_kf_ = ind_kf[train_inds] ind_kx_ = ind_kx[train_inds] return y_,ind_kf_,ind_kx_
def testCompareProjectSumAndProject(self): # Compare results of project_sum and project. tens = initializers.random_tensor_batch((2, 3, 4), 3, batch_size=4) tangent_tens = initializers.random_tensor((2, 3, 4), 4) project_sum = riemannian.project_sum(tens, tangent_tens, tf.eye(4)) project = riemannian.project(tens, tangent_tens) with self.test_session() as sess: res = sess.run((ops.full(project_sum), ops.full(project))) project_sum_val, project_val = res self.assertAllClose(project_sum_val, project_val)
def __call__(self, placeholder=None, moving_params=None): """""" embeddings = super(PretrainedVocab, self).__call__(placeholder, moving_params=moving_params) # (n x b x d') -> (n x b x d) with tf.variable_scope(self.name.title()): matrix = linalg.linear(embeddings, self.token_embed_size, moving_params=moving_params) if moving_params is None: with tf.variable_scope('Linear', reuse=True): weights = tf.get_variable('Weights') tf.losses.add_loss(tf.nn.l2_loss(tf.matmul(tf.transpose(weights), weights) - tf.eye(self.token_embed_size))) return matrix #return embeddings # changed in saves2/test8 #=============================================================
def linear_combine(clen, pclen, idx): Wl = param.get('Wl') Wr = param.get('Wr') dim = tf.unstack(tf.shape(Wl))[0] batch_shape = tf.shape(clen) f = (clen / pclen) l = (pclen - idx - 1) / (pclen - 1) r = (idx) / (pclen - 1) # when pclen == 1, replace nan items with 0.5 l = tf.where(tf.is_nan(l), tf.ones_like(l) * 0.5, l) r = tf.where(tf.is_nan(r), tf.ones_like(r) * 0.5, r) lb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * l) rb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * r) fb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * f) lb = tf.reshape(lb, [-1, hyper.word_dim]) rb = tf.reshape(rb, [-1, hyper.word_dim]) tmp = tf.matmul(lb, Wl) + tf.matmul(rb, Wr) tmp = tf.reshape(tmp, [-1, hyper.word_dim, hyper.word_dim]) return tf.matmul(fb, tmp)
def tri_combined(idx, pclen, depth, max_depth): """TF function, input: idx, pclen, depth, max_depth as batch (1D Tensor) Output: weight tensor (3D Tensor), first dim is batch """ Wconvt = param.get('Wconvt') Wconvl = param.get('Wconvl') Wconvr = param.get('Wconvr') dim = tf.unstack(tf.shape(Wconvt))[0] batch_shape = tf.shape(idx) tmp = (idx - 1) / (pclen - 1) # when pclen == 1, replace nan items with 0.5 tmp = tf.where(tf.is_nan(tmp), tf.ones_like(tmp) * 0.5, tmp) t = (max_depth - depth) / max_depth r = (1 - t) * tmp l = (1 - t) * (1 - r) lb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * l) rb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * r) tb = tf.transpose(tf.transpose(tf.eye(dim, batch_shape=batch_shape)) * t) lb = tf.reshape(lb, [-1, dim]) rb = tf.reshape(rb, [-1, dim]) tb = tf.reshape(tb, [-1, dim]) tmp = tf.matmul(lb, Wconvl) + tf.matmul(rb, Wconvr) + tf.matmul(tb, Wconvt) tmp = tf.reshape(tmp, [-1, hyper.word_dim, hyper.conv_dim]) return tmp
def __init__(self, n_labeled, n_unlabeled, n_classes): self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled]) self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled]) self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes]) w_init = tf.random_uniform(shape=[], minval=0.5, maxval=5) self._w = w = tf.get_variable("w", dtype=tf.float32, initializer=w_init) b_init = tf.random_uniform(shape=[], minval=-1, maxval=1) self._b = b = tf.get_variable("b", dtype=tf.float32, initializer=b_init) tuu = tf.sigmoid(w * t_uu + b) tul = tf.sigmoid(w * t_ul + b) # tuu = tf.Print(tuu, [tuu], 'tuu', summarize=30) # tul = tf.Print(tul, [tul], 'tul', summarize=30) # column normalization tuu_col_norms = tf.norm(tuu, ord=1, axis=0) tul_col_norms = tf.norm(tul, ord=1, axis=0) tuu /= tuu_col_norms tul /= tul_col_norms # row normalization tuu_row_norms = tf.norm(tuu, ord=1, axis=1) tul_row_norms = tf.norm(tul, ord=1, axis=1) tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1]) tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1]) I = tf.eye(n_unlabeled, dtype=tf.float32) inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01) y_u = tf.matmul(tf.matmul(inv, tul), y_l) y = tf.concat([y_u, y_l], 0) self._y = y = tf.clip_by_value(y, 1e-15, float("inf")) self._entropy = entropy = - tf.reduce_sum(y * tf.log(y)) self._train_op = tf.train.AdamOptimizer(0.005).minimize(entropy)
def __init__(self, n_labeled, n_unlabeled, n_classes): self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled]) self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled]) self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes]) w_init = tf.random_uniform(shape=[], minval=0.5, maxval=5) self._w = w = tf.get_variable("w", dtype=tf.float32, initializer=w_init) b_init = tf.random_uniform(shape=[], minval=-1, maxval=1) self._b = b = tf.get_variable("b", dtype=tf.float32, initializer=b_init) tuu = tf.sigmoid(w * t_uu + b) tul = tf.sigmoid(w * t_ul + b) # tuu = tf.Print(tuu, [tuu], 'tuu', summarize=30) # tul = tf.Print(tul, [tul], 'tul', summarize=30) # column normalization tuu_col_norms = tf.norm(tuu, ord=1, axis=0) tul_col_norms = tf.norm(tul, ord=1, axis=0) tuu /= tuu_col_norms tul /= tul_col_norms # row normalization tuu_row_norms = tf.norm(tuu, ord=1, axis=1) tul_row_norms = tf.norm(tul, ord=1, axis=1) tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1]) tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1]) I = tf.eye(n_unlabeled, dtype=tf.float32) inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01) y_u = tf.matmul(tf.matmul(inv, tul), y_l) y = tf.concat([y_u, y_l], 0) self._y = y = tf.clip_by_value(y, 1e-15, float("inf")) self._entropy = entropy = - tf.reduce_sum(y * tf.log(y)) self._train_op = tf.train.AdamOptimizer(0.1).minimize(entropy)
def __init__(self, n_labeled, n_unlabeled, n_classes): self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled]) self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled]) self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes]) self._w = w = tf.placeholder(tf.float32, shape=[]) self._b = b = tf.placeholder(tf.float32, shape=[]) tuu = tf.sigmoid(w * t_uu + b) tul = tf.sigmoid(w * t_ul + b) # column normalization tuu_col_norms = tf.norm(tuu, ord=1, axis=0) tul_col_norms = tf.norm(tul, ord=1, axis=0) tuu /= tuu_col_norms tul /= tul_col_norms # row normalization tuu_row_norms = tf.norm(tuu, ord=1, axis=1) tul_row_norms = tf.norm(tul, ord=1, axis=1) tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1]) tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1]) I = tf.eye(n_unlabeled, dtype=tf.float32) inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01) y_u = tf.matmul(tf.matmul(inv, tul), y_l) y = tf.concat([y_u, y_l], 0) self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
def __init__(self, n_labeled, n_unlabeled, input_dims, n_classes): self._t_uu = tuu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled]) self._t_ul = tul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled]) self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes]) tuu = tf.sigmoid(t_uu) tul = tf.sigmoid(t_ul) # column normalization tuu_col_norms = tf.norm(tuu, ord=1, axis=0) tul_col_norms = tf.norm(tul, ord=1, axis=0) tuu /= tuu_col_norms tul /= tul_col_norms # row normalization tuu_row_norms = tf.norm(tuu, ord=1, axis=1) tul_row_norms = tf.norm(tul, ord=1, axis=1) tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1]) tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1]) I = tf.eye(n_unlabeled, dtype=tf.float32) inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01) y_u = tf.matmul(tf.matmul(inv, tul), y_l) y = tf.concat([y_u, y_l], 0) self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
def __init__(self, n_labeled, n_unlabeled, n_classes): self._t_uu = t_uu = tf.placeholder(tf.float32, shape=[n_unlabeled, n_unlabeled]) self._t_ul = t_ul = tf.placeholder(tf.float32, shape=[n_unlabeled, n_labeled]) self._y_l = y_l = tf.placeholder(tf.float32, shape=[n_labeled, n_classes]) tuu = tf.sigmoid(t_uu) tul = tf.sigmoid(t_ul) # column normalization tuu_col_norms = tf.norm(tuu, ord=1, axis=0) tul_col_norms = tf.norm(tul, ord=1, axis=0) tuu /= tuu_col_norms tul /= tul_col_norms # row normalization tuu_row_norms = tf.norm(tuu, ord=1, axis=1) tul_row_norms = tf.norm(tul, ord=1, axis=1) tuu /= tf.reshape(tuu_row_norms, [n_unlabeled, 1]) tul /= tf.reshape(tul_row_norms, [n_unlabeled, 1]) I = tf.eye(n_unlabeled, dtype=tf.float32) inv = tf.matrix_solve_ls((I - tuu), I, l2_regularizer=0.01) y_u = tf.matmul(tf.matmul(inv, tul), y_l) y = tf.concat([y_u, y_l], 0) self._y = y = tf.clip_by_value(y, 1e-15, float("inf"))
def add_minibatch_features(image,df_dim): shape = image.get_shape().as_list() dim = np.prod(shape[1:]) # dim = prod(9,2) = 18 h_mb0 = lrelu(conv2d(image, df_dim, name='d_mb0_conv')) h_mb1 = conv2d(h_mb0, df_dim, name='d_mbh1_conv') dims=h_mb1.get_shape().as_list() conv_dims=np.prod(dims[1:]) image_ = tf.reshape(h_mb1, tf.stack([-1, conv_dims])) #image_ = tf.reshape(h_mb1, tf.stack([batch_size, -1])) n_kernels = 300 dim_per_kernel = 50 x = linear(image_, n_kernels * dim_per_kernel,'d_mbLinear') act = tf.reshape(x, (-1, n_kernels, dim_per_kernel)) act= tf.reshape(x, (-1, n_kernels, dim_per_kernel)) act_tp=tf.transpose(act, [1,2,0]) #bs x n_ker x dim_ker x bs -> bs x n_ker x bs : abs_dif = tf.reduce_sum(tf.abs(tf.expand_dims(act, 3) - tf.expand_dims(act_tp, 0)), 2) eye=tf.expand_dims( tf.eye( tf.shape(abs_dif)[0] ), 1)#bs x 1 x bs masked=tf.exp(-abs_dif) - eye f1=tf.reduce_mean( masked, 2) mb_features = tf.reshape(f1, [-1, 1, 1, n_kernels]) return conv_cond_concat(image, mb_features) ## following is from https://github.com/openai/improved-gan/blob/master/imagenet/discriminator.py#L88 #def add_minibatch_features(image,df_dim,batch_size): # shape = image.get_shape().as_list() # dim = np.prod(shape[1:]) # dim = prod(9,2) = 18 # h_mb0 = lrelu(conv2d(image, df_dim, name='d_mb0_conv')) # h_mb1 = conv2d(h_mb0, df_dim, name='d_mbh1_conv') # # dims=h_mb1.get_shape().as_list() # conv_dims=np.prod(dims[1:]) # # image_ = tf.reshape(h_mb1, tf.stack([-1, conv_dims])) # #image_ = tf.reshape(h_mb1, tf.stack([batch_size, -1])) # # n_kernels = 300 # dim_per_kernel = 50 # x = linear(image_, n_kernels * dim_per_kernel,'d_mbLinear') # activation = tf.reshape(x, (batch_size, n_kernels, dim_per_kernel)) # big = np.zeros((batch_size, batch_size), dtype='float32') # big += np.eye(batch_size) # big = tf.expand_dims(big, 1) # abs_dif = tf.reduce_sum(tf.abs(tf.expand_dims(activation, 3) - tf.expand_dims(tf.transpose(activation, [1, 2, 0]), 0)), 2) # mask = 1. - big # masked = tf.exp(-abs_dif) * mask # f1 = tf.reduce_sum(masked, 2) / tf.reduce_sum(mask) # mb_features = tf.reshape(f1, [batch_size, 1, 1, n_kernels]) # return conv_cond_concat(image, mb_features)
def interatomic_distances(positions, cell, pbc, cutoff): with tf.variable_scope('distance'): # calculate heights # account for zero cell in case of no pbc c = tf.reduce_sum(tf.cast(pbc, tf.int32)) > 0 icell = tf.cond(c, lambda: tf.matrix_inverse(cell), lambda: tf.eye(3)) height = 1. / tf.sqrt(tf.reduce_sum(tf.square(icell), 0)) extent = tf.where(tf.cast(pbc, tf.bool), tf.cast(tf.floor(cutoff / height), tf.int32), tf.cast(tf.zeros_like(height), tf.int32)) n_reps = tf.reduce_prod(2 * extent + 1) # replicate atoms r = tf.range(-extent[0], extent[0] + 1) v0 = tf.expand_dims(r, 1) v0 = tf.tile(v0, tf.stack(((2 * extent[1] + 1) * (2 * extent[2] + 1), 1))) v0 = tf.reshape(v0, tf.stack((n_reps, 1))) r = tf.range(-extent[1], extent[1] + 1) v1 = tf.expand_dims(r, 1) v1 = tf.tile(v1, tf.stack((2 * extent[2] + 1, 2 * extent[0] + 1))) v1 = tf.reshape(v1, tf.stack((n_reps, 1))) v2 = tf.expand_dims(tf.range(-extent[2], extent[2] + 1), 1) v2 = tf.tile(v2, tf.stack((1, (2 * extent[0] + 1) * (2 * extent[1] + 1)))) v2 = tf.reshape(v2, tf.stack((n_reps, 1))) v = tf.cast(tf.concat((v0, v1, v2), axis=1), tf.float32) offset = tf.matmul(v, cell) offset = tf.expand_dims(offset, 0) # add axes positions = tf.expand_dims(positions, 1) rpos = positions + offset rpos = tf.expand_dims(rpos, 0) positions = tf.expand_dims(positions, 1) euclid_dist = tf.sqrt( tf.reduce_sum(tf.square(positions - rpos), reduction_indices=3)) return euclid_dist
def get_weight(self, name, shape, init='glorot', device='gpu', weight_val=None, trainable=True): """Creates a new weight. Args: name: str, the name of the variable. shape: tuple of ints, the shape of the variable. init: str, the type of initialize to use. device: str, 'cpu' or 'gpu'. weight_val: Numpy array to use as the initial weights. trainable: bool, whether or not this weight is trainable. Returns: a trainable TF variable with shape `shape`. """ if weight_val is None: init = init.lower() if init == 'normal': initializer = (lambda shape, dtype, partition_info: tf.random_normal(shape, stddev=0.05)) elif init == 'uniform': initializer = (lambda shape, dtype, partition_info: tf.random_uniform(shape, stddev=0.05)) elif init == 'glorot': initializer = (lambda shape, dtype, partition_info: tf.random_normal( shape, stddev=np.sqrt(6. / sum(shape)))) elif init == 'eye': assert all(i == shape[0] for i in shape) initializer = (lambda shape, dtype, partition_info: tf.eye(shape[0])) elif init == 'zero': initializer = (lambda shape, dtype, partition_info: tf.zeros(shape)) else: raise ValueError('Invalid init: "%s"' % init) else: weight_val = weight_val.astype('float32') device = device.lower() if device == 'gpu': on_gpu = True elif device == 'cpu': on_gpu = False else: raise ValueError('Invalid device: "%s"' % device) if self._only_cpu: on_gpu = False with tf.device('/gpu:0' if on_gpu else '/cpu:0'): weight = tf.get_variable(name=name, shape=shape, initializer=initializer, trainable=trainable) self._weights.append(weight) return weight
def discriminator_lks_test(self, opts, input_): """Deterministic discriminator using Kernel Stein Discrepancy test refer to the quadratic test of https://arxiv.org/pdf/1705.07673.pdf The statistic basically reads: \[ \frac{1}{n^2 - n}\sum_{i \neq j} \left( frac{<x_i, x__j>}{\sigma_p^4} + d/\sigma_k^2 - \|x_i - x_j\|^2\left(\frac{1}{\sigma_p^2\sigma_k^2} + \frac{1}{\sigma_k^4}\right) \right) \exp( - \|x_i - x_j\|^2/2/\sigma_k^2) \] """ n = self.get_batch_size(opts, input_) n = tf.cast(n, tf.int32) half_size = (n * n - n) / 2 nf = tf.cast(n, tf.float32) norms = tf.reduce_sum(tf.square(input_), axis=1, keep_dims=True) dotprods = tf.matmul(input_, input_, transpose_b=True) distances = norms + tf.transpose(norms) - 2. * dotprods sigma2_p = opts['pot_pz_std'] ** 2 # var = std ** 2 # Median heuristic for the sigma^2 of Gaussian kernel # sigma2_k = tf.nn.top_k(tf.reshape(distances, [-1]), half_size).values[half_size - 1] # Maximal heuristic for the sigma^2 of Gaussian kernel # sigma2_k = tf.nn.top_k(tf.reshape(distances, [-1]), 1).values[0] sigma2_k = opts['latent_space_dim'] * sigma2_p if opts['verbose'] == 2: sigma2_k = tf.Print(sigma2_k, [tf.nn.top_k(tf.reshape(distances, [-1]), 1).values[0]], 'Maximal squared pairwise distance:') sigma2_k = tf.Print(sigma2_k, [tf.reduce_mean(distances)], 'Average squared pairwise distance:') sigma2_k = tf.Print(sigma2_k, [sigma2_k], 'Kernel width:') res = dotprods / sigma2_p ** 2 \ - distances * (1. / sigma2_p / sigma2_k + 1. / sigma2_k ** 2) \ + opts['latent_space_dim'] / sigma2_k res = tf.multiply(res, tf.exp(- distances / 2./ sigma2_k)) res = tf.multiply(res, 1. - tf.eye(n)) stat = tf.reduce_sum(res) / (nf * nf - nf) # stat = tf.reduce_sum(res) / (nf * nf) return stat
def add_least_gaussian2d_ops(self, opts): """ Add ops searching for the 2d plane in z_dim hidden space corresponding to the 'least Gaussian' look of the sample """ with tf.variable_scope('leastGaussian2d'): # Projection matrix which we are going to tune sample_ph = tf.placeholder( tf.float32, [None, opts['latent_space_dim']], name='sample_ph') v = tf.get_variable( "proj_v", [opts['latent_space_dim'], 1], tf.float32, tf.random_normal_initializer(stddev=1.)) u = tf.get_variable( "proj_u", [opts['latent_space_dim'], 1], tf.float32, tf.random_normal_initializer(stddev=1.)) npoints = tf.cast(tf.shape(sample_ph)[0], tf.int32) # First we need to make sure projection matrix is orthogonal v_norm = tf.nn.l2_normalize(v, 0) dotprod = tf.reduce_sum(tf.multiply(u, v_norm)) u_ort = u - dotprod * v_norm u_norm = tf.nn.l2_normalize(u_ort, 0) Mproj = tf.concat([v_norm, u_norm], 1) sample_proj = tf.matmul(sample_ph, Mproj) a = tf.eye(npoints) - tf.ones([npoints, npoints]) / tf.cast(npoints, tf.float32) b = tf.matmul(sample_proj, tf.matmul(a, a), transpose_a=True) b = tf.matmul(b, sample_proj) # Sample covariance matrix covhat = b / (tf.cast(npoints, tf.float32) - 1) # covhat = tf.Print(covhat, [covhat], 'Cov:') with tf.variable_scope('leastGaussian2d'): gcov = opts['pot_pz_std'] * opts['pot_pz_std'] * tf.eye(2) # l2 distance between sample cov and the Gaussian cov projloss = tf.reduce_sum(tf.square(covhat - gcov)) # Also account for the first moment, i.e. expected value projloss += tf.reduce_sum(tf.square(tf.reduce_mean(sample_proj, 0))) # We are maximizing projloss = -projloss optim = tf.train.AdamOptimizer(0.001, 0.9) optim = optim.minimize(projloss, var_list=[v, u]) self._proj_u = u_norm self._proj_v = v_norm self._proj_sample_ph = sample_ph self._proj_covhat = covhat self._proj_loss = projloss self._proj_optim = optim
def _build_likelihood(self): """ Construct a tensorflow function to compute the bound on the marginal likelihood. """ num_inducing = tf.shape(self.Z)[0] psi0 = tf.reduce_sum(self.kern.eKdiag(self.X_mean, self.X_var), 0) psi1 = self.kern.eKxz(self.Z, self.X_mean, self.X_var) psi2 = tf.reduce_sum(self.kern.eKzxKxz(self.Z, self.X_mean, self.X_var), 0) Kuu = self.kern.K(self.Z) + tf.eye(num_inducing, dtype=settings.float_type) * settings.numerics.jitter_level L = tf.cholesky(Kuu) sigma2 = self.likelihood.variance sigma = tf.sqrt(sigma2) # Compute intermediate matrices A = tf.matrix_triangular_solve(L, tf.transpose(psi1), lower=True) / sigma tmp = tf.matrix_triangular_solve(L, psi2, lower=True) AAT = tf.matrix_triangular_solve(L, tf.transpose(tmp), lower=True) / sigma2 B = AAT + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) log_det_B = 2. * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB))) c = tf.matrix_triangular_solve(LB, tf.matmul(A, self.Y), lower=True) / sigma # KL[q(x) || p(x)] dX_var = self.X_var if len(self.X_var.get_shape()) == 2 else tf.matrix_diag_part(self.X_var) NQ = tf.cast(tf.size(self.X_mean), settings.float_type) D = tf.cast(tf.shape(self.Y)[1], settings.float_type) KL = -0.5 * tf.reduce_sum(tf.log(dX_var)) \ + 0.5 * tf.reduce_sum(tf.log(self.X_prior_var)) \ - 0.5 * NQ \ + 0.5 * tf.reduce_sum((tf.square(self.X_mean - self.X_prior_mean) + dX_var) / self.X_prior_var) # compute log marginal bound ND = tf.cast(tf.size(self.Y), settings.float_type) bound = -0.5 * ND * tf.log(2 * np.pi * sigma2) bound += -0.5 * D * log_det_B bound += -0.5 * tf.reduce_sum(tf.square(self.Y)) / sigma2 bound += 0.5 * tf.reduce_sum(tf.square(c)) bound += -0.5 * D * (tf.reduce_sum(psi0) / sigma2 - tf.reduce_sum(tf.matrix_diag_part(AAT))) bound -= KL return bound
def conditional(Xnew, X, kern, f, *, full_cov=False, q_sqrt=None, white=False): """ Given f, representing the GP at the points X, produce the mean and (co-)variance of the GP at the points Xnew. Additionally, there may be Gaussian uncertainty about f as represented by q_sqrt. In this case `f` represents the mean of the distribution and q_sqrt the square-root of the covariance. Additionally, the GP may have been centered (whitened) so that p(v) = N(0, I) f = L v thus p(f) = N(0, LL^T) = N(0, K). In this case `f` represents the values taken by v. The method can either return the diagonals of the covariance matrix for each output (default) or the full covariance matrix (full_cov=True). We assume K independent GPs, represented by the columns of f (and the last dimension of q_sqrt). :param Xnew: data matrix, size N x D. :param X: data points, size M x D. :param kern: GPflow kernel. :param f: data matrix, M x K, representing the function values at X, for K functions. :param q_sqrt: matrix of standard-deviations or Cholesky matrices, size M x K or M x M x K. :param white: boolean of whether to use the whitened representation as described above. :return: two element tuple with conditional mean and variance. """ num_data = tf.shape(X)[0] # M Kmm = kern.K(X) + tf.eye(num_data, dtype=settings.float_type) * settings.numerics.jitter_level Kmn = kern.K(X, Xnew) if full_cov: Knn = kern.K(Xnew) else: Knn = kern.Kdiag(Xnew) return base_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white)
def training_decoding_layer( target_data, target_lengths, enc_output, enc_output_lengths, fst, keep_prob): ''' Training decoding layer for the model. Returns: Training logits ''' target_data = tf.concat( [tf.fill([FLAGS.batch_size, 1], VOCAB_TO_INT['<s>']), target_data[:, :-1]], 1) dec_cell = get_dec_cell( enc_output, enc_output_lengths, FLAGS.use_train_lm, fst, 1, keep_prob) initial_state = dec_cell.zero_state( dtype=tf.float32, batch_size=FLAGS.batch_size) target_data = tf.nn.embedding_lookup( tf.eye(VOCAB_SIZE), target_data) training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=target_data, sequence_length=target_lengths, time_major=False) training_decoder = tf.contrib.seq2seq.BasicDecoder( dec_cell, training_helper, initial_state) training_logits, _, _ = tf.contrib.seq2seq.dynamic_decode( training_decoder, output_time_major=False, impute_finished=True) return training_logits
def inference_decoding_layer( enc_output, enc_output_lengths, fst, keep_prob): ''' Inference decoding layer for the model. Returns: Predictions ''' dec_cell = get_dec_cell( enc_output, enc_output_lengths, FLAGS.use_inference_lm, fst, FLAGS.beam_width, keep_prob) initial_state = dec_cell.zero_state( dtype=tf.float32, batch_size=FLAGS.batch_size * FLAGS.beam_width) start_tokens = tf.fill( [FLAGS.batch_size], VOCAB_TO_INT['<s>'], name='start_tokens') inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder( dec_cell, tf.eye(VOCAB_SIZE), start_tokens, VOCAB_TO_INT['</s>'], initial_state, FLAGS.beam_width) predictions, _, _ = tf.contrib.seq2seq.dynamic_decode( inference_decoder, output_time_major=False, maximum_iterations=FLAGS.max_output_len) return predictions
def __init__(self, X, y, M=10, max_iter = 2000, N_batch = 1, monitor_likelihood = 10, lrate = 1e-3): (N,D) = X.shape # kmeans on a subset of data N_subset = min(N, 10000) idx = np.random.choice(N, N_subset, replace=False) kmeans = KMeans(n_clusters=M, random_state=0).fit(X[idx,:]) Z = kmeans.cluster_centers_ hyp = np.log(np.ones(D+1)) logsigma_n = np.array([-4.0]) hyp = np.concatenate([hyp, logsigma_n]) m = np.zeros((M,1)) S = kernel(Z,Z,hyp[:-1]) self.X = X self.y = y self.M = M self.Z = tf.Variable(Z,dtype=tf.float64,trainable=False) self.K_u_inv = tf.Variable(np.eye(M),dtype=tf.float64,trainable=False) self.m = tf.Variable(m,dtype=tf.float64,trainable=False) self.S = tf.Variable(S,dtype=tf.float64,trainable=False) self.nlml = tf.Variable(0.0, dtype=tf.float64, trainable=False) self.hyp = hyp self.max_iter = max_iter self.N_batch = N_batch self.monitor_likelihood = monitor_likelihood self.jitter = 1e-8 self.jitter_cov = 1e-8 self.lrate = lrate self.optimizer = tf.train.AdamOptimizer(self.lrate) # Tensor Flow Session # self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) self.sess = tf.Session()
def likelihood(self, hyp, X_batch, y_batch, monitor=False): M = self.M Z = self.Z m = self.m S = self.S jitter = self.jitter jitter_cov = self.jitter_cov N = tf.shape(X_batch)[0] logsigma_n = hyp[-1] sigma_n = tf.exp(logsigma_n) # Compute K_u_inv K_u = kernel_tf(Z, Z, hyp[:-1]) L = tf.cholesky(K_u + np.eye(M)*jitter_cov) K_u_inv = tf.matrix_triangular_solve(tf.transpose(L), tf.matrix_triangular_solve(L, np.eye(M), lower=True), lower=False) K_u_inv_op = self.K_u_inv.assign(K_u_inv) # Compute mu psi = kernel_tf(Z, X_batch, hyp[:-1]) K_u_inv_m = tf.matmul(K_u_inv, m) MU = tf.matmul(tf.transpose(psi), K_u_inv_m) # Compute cov Alpha = tf.matmul(K_u_inv, psi) COV = kernel_tf(X_batch, X_batch, hyp[:-1]) - tf.matmul(tf.transpose(psi), tf.matmul(K_u_inv,psi)) + \ tf.matmul(tf.transpose(Alpha), tf.matmul(S,Alpha)) # Compute COV_inv LL = tf.cholesky(COV + tf.eye(N, dtype=tf.float64)*sigma_n + tf.eye(N, dtype=tf.float64)*jitter) COV_inv = tf.matrix_triangular_solve(tf.transpose(LL), tf.matrix_triangular_solve(LL, tf.eye(N, dtype=tf.float64), lower=True), lower=False) # Compute cov(Z, X) cov_ZX = tf.matmul(S,Alpha) # Update m and S alpha = tf.matmul(COV_inv, tf.transpose(cov_ZX)) m_new = m + tf.matmul(cov_ZX, tf.matmul(COV_inv, y_batch-MU)) S_new = S - tf.matmul(cov_ZX, alpha) if monitor == False: m_op = self.m.assign(m_new) S_op = self.S.assign(S_new) # Compute NLML K_u_inv_m = tf.matmul(K_u_inv, m_new) NLML = 0.5*tf.matmul(tf.transpose(m_new), K_u_inv_m) + tf.reduce_sum(tf.log(tf.diag_part(L))) + 0.5*np.log(2.*np.pi)*tf.cast(M, tf.float64) train = self.optimizer.minimize(NLML) nlml_op = self.nlml.assign(NLML[0,0]) return tf.group(*[train, m_op, S_op, nlml_op, K_u_inv_op])
def block_Lanczos(Sigma_func,B_,n_mc_smps): """ block Lanczos method to approx Sigma^1/2 * B, with B matrix of N(0,1)'s. Used to generate multiple approximate large normal draws. """ n = tf.shape(B_)[0] #dim of the multivariate normal s = n_mc_smps #number of samples to draw k = tf.div(n,500) + 3 #number of Lanczos iterations betas = tf.zeros([1,s]) alphas = tf.zeros([0,s]) D = tf.zeros([s,n,1]) B_norms = tf.norm(B_,axis=0) D = tf.concat([D,tf.expand_dims(tf.transpose(B_/B_norms),2)],2) def cond(j,alphas,betas,D): return j < k+1 #TODO: use block-CG in place of Sigma def body(j,alphas,betas,D): d_j = tf.squeeze(tf.slice(D,[0,0,j],[-1,-1,1])) d = Sigma_func(tf.transpose(d_j)) - (tf.slice(betas,[j-1,0],[1,-1])* tf.transpose(tf.squeeze(tf.slice(D,[0,0,j-1],[-1,-1,1])))) alphas = tf.concat([alphas,[tf.diag_part(tf.matmul(d_j,d))]],0) d = d - tf.slice(alphas,[j-1,0],[1,-1])*tf.transpose(d_j) betas = tf.concat([betas,[tf.norm(d,axis=0)]],0) D = tf.concat([D,tf.expand_dims(tf.transpose(d/tf.slice(betas,[j,0],[1,-1])),2)],2) return j+1,alphas,betas,D j = tf.constant(1) j,alphas,betas,D = tf.while_loop(cond,body,loop_vars=[j,alphas,betas,D], shape_invariants=[j.get_shape(),tf.TensorShape([None,None]), tf.TensorShape([None,None]),tf.TensorShape([None,None,None])]) D_ = tf.slice(D,[0,0,1],[-1,-1,k]) ##TODO: replace loop H = tf.zeros([0,k,k]) for ss in range(s): this_beta = tf.diag(tf.squeeze(tf.slice(betas,[1,ss],[k-1,1]))) #build out tridiagonal H: alphas_1:k on main, betas_2:k on off this_H = (tf.diag(tf.squeeze(tf.slice(alphas,[0,ss],[-1,1]))) + tf.pad(this_beta,[[1,0],[0,1]]) + tf.pad(this_beta,[[0,1],[1,0]])) H = tf.concat([H,tf.expand_dims(this_H,0)],0) E,V = tf.self_adjoint_eig(H) E_sqrt = tf.zeros([0,k,k]) #TODO: replace loop for ss in range(s): #ensure positive definite E_sqrt = tf.concat([E_sqrt,tf.expand_dims(tf.diag(tf.squeeze(tf.sqrt(tf.maximum(tf.slice(E,[ss,0],[1,-1]),1e-6)))),0)],0) sq_H = tf.matmul(V,tf.matmul(E_sqrt,tf.transpose(V,perm=[0,2,1]))) e1 = tf.expand_dims(tf.transpose(tf.tile(tf.slice(tf.eye(k),[0,0],[-1,1]),[1,s])),2) out = B_norms*tf.transpose(tf.squeeze(tf.matmul(D_,tf.matmul(sq_H,e1)))) return out
def __init__(self, dim_z, dim_y, dim_u=0, dim_k=1, **kwargs): self.dim_z = dim_z self.dim_y = dim_y self.dim_u = dim_u self.dim_k = dim_k # Initializer for identity matrix self.eye_init = lambda shape, dtype=np.float32: np.eye(*shape, dtype=dtype) # Pop all variables init = kwargs.pop('mu', np.zeros((dim_z, ), dtype=np.float32)) self.mu = tf.get_variable('mu', initializer=init, trainable=False) # state init = kwargs.pop('Sigma', self.eye_init((dim_z, dim_z))).astype(np.float32) self.Sigma = tf.get_variable('Sigma', initializer=init, trainable=False) # uncertainty covariance init = kwargs.pop('y_0', np.zeros((dim_y,))).astype(np.float32) self.y_0 = tf.get_variable('y_0', initializer=init) # initial output init = kwargs.pop('A', self.eye_init((dim_z, dim_z))) self.A = tf.get_variable('A', initializer=init) init = kwargs.pop('B', self.eye_init((dim_z, dim_u))).astype(np.float32) self.B = tf.get_variable('B', initializer=init) # control transition matrix init = kwargs.pop('Q', self.eye_init((dim_z, dim_z))).astype(np.float32) self.Q = tf.get_variable('Q', initializer=init, trainable=False) # process uncertainty init = kwargs.pop('C', self.eye_init((dim_y, dim_z))).astype(np.float32) self.C = tf.get_variable('C', initializer=init) # Measurement function init = kwargs.pop('R', self.eye_init((dim_y, dim_y))).astype(np.float32) self.R = tf.get_variable('R', initializer=init, trainable=False) # state uncertainty self._alpha_sq = tf.constant(1., dtype=tf.float32) # fading memory control self.M = 0 # process-measurement cross correlation # identity matrix self._I = tf.constant(self.eye_init((dim_z, dim_z)), name='I') # Get variables that are possibly defined with tensors self.y = kwargs.pop('y', None) if self.y is None: self.y = tf.placeholder(tf.float32, shape=(None, None, dim_y), name='y') self.u = kwargs.pop('u', None) if self.u is None: self.u = tf.placeholder(tf.float32, shape=(None, None, dim_u), name='u') self.mask = kwargs.pop('mask', None) if self.mask is None: self.mask = tf.placeholder(tf.float32, shape=(None, None), name='mask') self.alpha = kwargs.pop('alpha', None) self.state = kwargs.pop('state', None) self.log_likelihood = None