我们从Python开源项目中,提取了以下33个代码示例,用于说明如何使用tensorflow.tensordot()。
def tensor_swirl(image, center=None, strength=1, radius=100, rotation=0, cval=0.0, **kwargs): # **kwargs is for unsupported options (ignored) cval = tf.fill(K.shape(image)[0:1], cval) shape = K.int_shape(image)[1:3] if center is None: center = np.array(shape) / 2 ys = np.expand_dims(np.repeat(np.arange(shape[0]), shape[1]),-1) xs = np.expand_dims(np.tile (np.arange(shape[1]), shape[0]),-1) map_xs, map_ys = swirl_mapping(xs, ys, center, rotation, strength, radius) mapping = np.zeros((*shape, *shape)) for map_x, map_y, x, y in zip(map_xs, map_ys, xs, ys): results = tensor_linear_interpolation(image, map_x, map_y, cval) for _y, _x, w in results: # mapping[int(y),int(x),int(_y),int(_x),] = w mapping[int(_y),int(_x),int(y),int(x),] = w results = tf.tensordot(image, K.variable(mapping), [[1,2],[0,1]]) # results = K.reshape(results, K.shape(image)) return results
def apply_attention(self): with tf.variable_scope('attention'): attention_vector = tf.get_variable(name='attention_vector', shape=[self.params.ATTENTION_DIM], dtype=tf.float32) mlp_layer_projection = tf.layers.dense(inputs=self.rnn_outputs, units=self.params.ATTENTION_DIM, activation=tf.nn.tanh, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='fc_attn') attended_vector = tf.tensordot(mlp_layer_projection, attention_vector, axes=[[2], [0]]) attention_weights = tf.expand_dims(tf.nn.softmax(attended_vector), -1) weighted_input = tf.matmul(self.rnn_outputs, attention_weights, transpose_a=True) self.attention_output = tf.squeeze(weighted_input, axis=2)
def _distance_logits(self, x1, x2): init = get_keras_initialization(self.init) project1 = tf.get_variable("project1", (x1.shape.as_list()[-1], self.project_size), initializer=init) x1 = tf.tensordot(x1, project1, [[2], [0]]) if self.share_project: if x2.shape.as_list()[-1] != x1.shape.as_list()[-1]: raise ValueError() project2 = project1 else: project2 = tf.get_variable("project2", (x2.shape.as_list()[-1], self.project_size), initializer=init) x2 = tf.tensordot(x2, project2, [[2], [0]]) if self.project_bias: x1 += tf.get_variable("bias1", (1, 1, self.project_size), initializer=tf.zeros_initializer()) x2 += tf.get_variable("bias2", (1, 1, self.project_size), initializer=tf.zeros_initializer()) dots = tf.matmul(x1, x2, transpose_b=True) if self.scale: dots /= tf.sqrt(tf.cast(self.project_size, tf.float32)) return dots
def _distance_logits(self, x, keys): init = get_keras_initialization(self.init) key_w = tf.get_variable("key_w", shape=(keys.shape.as_list()[-1], self.projected_size), initializer=init, dtype=tf.float32) key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len, projected_size) if self.shared_project: x_w = key_w else: x_w = tf.get_variable("x_w", shape=(x.shape.as_list()[-1], self.projected_size), initializer=init, dtype=tf.float32) x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len, projected_size) summed = tf.expand_dims(x_logits, axis=2) + tf.expand_dims(key_logits, axis=1) # (batch, key_len, x_len, poject_size) summed = get_keras_activation(self.activation)(summed) combine_w = tf.get_variable("combine_w", shape=self.projected_size, initializer=init, dtype=tf.float32) return tf.tensordot(summed, combine_w, axes=[[3], [0]]) # (batch, key_len, x_len)
def _distance_logits(self, x, keys): init = get_keras_initialization(self.init) key_w = tf.get_variable("key_w", shape=keys.shape.as_list()[-1], initializer=init, dtype=tf.float32) key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len) x_w = tf.get_variable("input_w", shape=x.shape.as_list()[-1], initializer=init, dtype=tf.float32) x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len) dot_w = tf.get_variable("dot_w", shape=x.shape.as_list()[-1], initializer=init, dtype=tf.float32) # Compute x * dot_weights first, the batch mult with x x_dots = x * tf.expand_dims(tf.expand_dims(dot_w, 0), 0) dot_logits = tf.matmul(x_dots, keys, transpose_b=True) return dot_logits + tf.expand_dims(key_logits, 1) + tf.expand_dims(x_logits, 2)
def apply(self, is_train, x, c, mask=None, context_mask=None): c_w = tf.get_variable("context_weights", shape=(c.shape.as_list()[-1], self.n_out), dtype=tf.float32, initializer=get_keras_initialization(self.init)) c_projected = tf.matmul(c, c_w) x_w = tf.get_variable("input_weights", shape=(x.shape.as_list()[-1], self.n_out), dtype=tf.float32, initializer=get_keras_initialization(self.init)) x_proj = tf.tensordot(x, x_w, [[2], [0]]) total = x_proj + tf.expand_dims(c_projected, 1) if self.use_bias: bias = tf.get_variable("bias", shape=self.n_out, dtype=tf.float32, initializer=tf.zeros_initializer()) total += tf.expand_dims(tf.expand_dims(bias, 0), 0) return get_keras_activation(self.activation)(total)
def apply(self, is_train, x, mask=None): if self.key_mapper is not None: with tf.variable_scope("map_keys"): keys = self.key_mapper.apply(is_train, x, mask) else: keys = x weights = tf.get_variable("weights", keys.shape.as_list()[-1], dtype=tf.float32, initializer=get_keras_initialization(self.init)) dist = tf.tensordot(keys, weights, axes=[[2], [0]]) # (batch, x_words) dist = exp_mask(dist, mask) dist = tf.nn.softmax(dist) out = tf.einsum("ajk,aj->ak", x, dist) # (batch, x_dim) if self.post_process is not None: with tf.variable_scope("post_process"): out = self.post_process.apply(is_train, out) return out
def apply(self, is_train, x, mask=None): if self.key_mapper is not None: with tf.variable_scope("map_keys"): keys = self.key_mapper.apply(is_train, x, mask) else: keys = x weights = tf.get_variable("weights", (keys.shape.as_list()[-1], self.n_encodings), dtype=tf.float32, initializer=get_keras_initialization(self.init)) dist = tf.tensordot(keys, weights, axes=[[2], [0]]) # (batch, x_words, n_encoding) if self.bias: dist += tf.get_variable("bias", (1, 1, self.n_encodings), dtype=tf.float32, initializer=tf.zeros_initializer()) if mask is not None: bool_mask = tf.expand_dims(tf.cast(tf.sequence_mask(mask, tf.shape(x)[1]), tf.float32), 2) dist = bool_mask * bool_mask + (1 - bool_mask) * VERY_NEGATIVE_NUMBER dist = tf.nn.softmax(dist, dim=1) out = tf.einsum("ajk,ajn->ank", x, dist) # (batch, n_encoding, feature) if self.post_process is not None: with tf.variable_scope("post_process"): out = self.post_process.apply(is_train, out) return out
def augmented_loss(self, y_true, y_pred): _y_pred = Activation("softmax")(y_pred) loss = K.categorical_crossentropy(_y_pred, y_true) # y is (batch x seq x vocab) y_indexes = K.argmax(y_true, axis=2) # turn one hot to index. (batch x seq) y_vectors = self.embedding(y_indexes) # lookup the vector (batch x seq x vector_length) #v_length = self.setting.vector_length #y_vectors = K.reshape(y_vectors, (-1, v_length)) #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors) #y_t = K.squeeze(y_t, axis=2) # unknown but necessary operation #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # vector x embedding dot products (batch x seq x vocab) y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1) y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # explicitly set shape y_t = K.softmax(y_t / self.temperature) _y_pred_t = Activation("softmax")(y_pred / self.temperature) aug_loss = kullback_leibler_divergence(y_t, _y_pred_t) loss += (self.gamma * self.temperature) * aug_loss return loss
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training): cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)]) encoder_hidden_size = int(enc_hidden_states.get_shape()[-1]) decoder_hidden_size = int(cell_dec.output_size) # if encoder and decoder have different sizes, add a projection layer if encoder_hidden_size != decoder_hidden_size: assert False, (encoder_hidden_size, decoder_hidden_size) with tf.variable_scope('hidden_projection'): kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32) # apply a relu to the projection for good measure enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state) enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]])) else: # flatten and repack the state enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state)) if self.config.connect_output_decoder: cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state) else: cell_dec = InputIgnoringCellWrapper(cell_dec, enc_final_state) if self.config.apply_attention: attention = LuongAttention(self.config.decoder_hidden_size, enc_hidden_states, self.input_length_placeholder, probability_fn=tf.nn.softmax) cell_dec = AttentionWrapper(cell_dec, attention, cell_input_fn=lambda inputs, _: inputs, attention_layer_size=self.config.decoder_hidden_size, initial_cell_state=enc_final_state) enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32) decoder = Seq2SeqDecoder(self.config, self.input_placeholder, self.input_length_placeholder, self.output_placeholder, self.output_length_placeholder, self.batch_number_placeholder) return decoder.decode(cell_dec, enc_final_state, self.config.grammar.output_size, output_embed_matrix, training)
def _setup(self, x, axes=None): """Setup the linear layer. :param x: Input tensor. :param axes: Axes. If x is a tensor, the layer will perform tensor dot. :return: Output tensor. """ y = tf.matmul(x, self._w) if axes is None else tf.tensordot(x, self._w, axes=axes) if self._with_bias: y += self._b if self._with_batch_norm: y = self._batch_norm.setup(y) return y
def _setup(self, seq, vec, activation=tf.nn.tanh): """Setup a soft attention mechanism for the given context sequence and state. The result is an attention context for the state. :param seq: The sequence tensor. Its shape is defined as (seq_length, batch_size, seq_elem_size). :param vec: The vector tensor. Its shape is defined as (batch_size, vec_size). :param activation: The activation function. Default is tf.nn.tanh. :return: An attention context with shape (batch_size, seq_elem_size). """ # # (seq_length, batch_size, seq_elem_size) @ (seq_elem_size, common_size) # -> (seq_length, batch_size, common_size) a = tf.tensordot(seq, self._w, ((2,), (0,))) # # (batch_size, vec_size) @ (vec_size, common_size) # -> (batch_size, common_size) # -> (1, batch_size, common_size) b = tf.matmul(vec, self._u) b = tf.reshape(b, (1, -1, self._common_size)) # # -> (seq_length, batch_size, common_size) # (seq_length, batch_size, common_size) @ (common_size, 1) # -> (seq_length, batch_size, 1) a = activation(a + b) if activation is not None else a + b a = tf.tensordot(a, self._omega, ((2,), (0,))) a = tf.nn.softmax(a, dim=0) # # (seq_length, batch_size, 1) * (seq_length, batch_size, seq_elem_size) # -> (seq_length, batch_size, seq_elem_size) # -> (batch_size, seq_elem_size) att_context = tf.reduce_sum(a * seq, 0) return att_context
def q_indep(q, q_mask): q_s = q for i in range(2): q_s = BiLSTM(q_s, q_mask, 'BiLSTM_q_indep_{}'.format(i)) w_q = tf.Variable(tf.random_normal([1, n_hidden])) s = tf.tensordot(FFNN(q_s, q_mask, 'FFNN_q_s'), w_q, axes=[[-1],[-1]]) a = softmax_with_mask(s,q_mask, dim=1) return tf.matmul(a, q_s, transpose_a=True)
def span_score_logits(spans, spans_mask): w_a = tf.Variable(tf.random_normal([n_hidden])) h_a = FFNN(spans, spans_mask, 'spans') s_a = tf.tensordot(h_a, w_a, axes=[[-1],[-1]]) return s_a * spans_mask[:, :, 0]
def main(): """ Train a policy on the CartPole-v0 environment. """ observations = tf.placeholder(tf.float32, shape=[None, 4]) out_probs = tf.nn.softmax(policy(observations)) # Selected actions (one-hot vectors) and cumulative # episode rewards for those actions. actions = tf.placeholder(tf.float32, shape=[None, 2]) goodnesses = tf.placeholder(tf.float32, shape=[None, 1]) loss = -tf.tensordot(tf.log(out_probs), actions*goodnesses, axes=2) loss /= tf.cast(tf.shape(actions)[0], tf.float32) opt = tf.train.AdamOptimizer(learning_rate=1e-2) minimize = opt.minimize(loss) env = gym.make('CartPole-v0') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) while True: obs, acts, rews, mean_rew = rollouts(env, sess, observations, out_probs, 2000) loss_args = { observations: obs, actions: acts, goodnesses: rews } print('mean_reward=%f' % (mean_rew,)) sess.run(minimize, feed_dict=loss_args)
def loss(self, actual_out): """ Compute the cross-entropy loss between the actual output and the desired targets. """ cost_sum = None for timestep, actual_term in enumerate(actual_out): target_term = self.outputs[timestep] log_probs = tf.log(tf.nn.softmax(actual_term)) loss = -tf.tensordot(log_probs, target_term, axes=2) if cost_sum is None: cost_sum = loss else: cost_sum += loss return cost_sum / (self.batch * self.length)
def surrogate_objective(policy_out): """ Create the surrogate objective for policy gradients. Returns actions, rewards, objective. """ actions = tf.placeholder(tf.float32, [None, 2]) rewards = tf.placeholder(tf.float32, [None, 1]) objective = tf.tensordot(tf.log(policy_out), actions*rewards, axes=2) return actions, rewards, objective
def dOmega_dWrec(self): # states in shape timesteps, batch, n_rec states = self.states dxt_list = tf.gradients(self.error, states) #dxt_list[0] = tf.Print(dxt_list[0], [dxt_list[0]], "dxt 0: ") test = tf.gradients(states[0], states[-1]) dxt = tf.stack(dxt_list) xt = tf.stack(states) num = (1 - self.alpha) * dxt + tf.tensordot(self.alpha * dxt , tf.transpose( tf.matmul(tf.abs(self.W_rec) * self.rec_Connectivity,self.Dale_rec)), axes=1) * \ tf.where(tf.greater(xt, 0), tf.ones_like(xt), tf.zeros_like(xt)) denom = dxt # sum over hidden units num = tf.reduce_sum(tf.square(num), axis=2) denom = tf.reduce_sum(tf.square(denom), axis=2) bounded = tf.where(tf.greater(denom, 1e-20), tf.div(num, 1.0 * denom), tf.ones_like(num)) nelems = tf.reduce_mean(tf.where(tf.greater(denom, 1e-20), 1.0 * tf.ones_like(num), 1.0 * tf.zeros_like(num)), axis=1) # sum mean over each batch by time steps Omega = tf.square(bounded - 1.0) Omega = tf.reduce_sum(tf.reduce_mean(Omega, axis=1)) / (1.0 * tf.reduce_sum(nelems)) out = tf.gradients(Omega, self.W_rec) out[0] = tf.Print(out[0], [out[0], self.W_rec, Omega], "omega grads") out[0] = tf.verify_tensor_all_finite(out[0], "dead omega grad") return out, test
def _distance_logits(self, x, keys): init = get_keras_initialization(self.init) key_w = tf.get_variable("key_w", shape=keys.shape.as_list()[-1], initializer=init, dtype=tf.float32) key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len) x_w = tf.get_variable("x_w", shape=x.shape.as_list()[-1], initializer=init, dtype=tf.float32) x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len) # Broadcasting will expand the arrays to (batch, x_len, key_len) return tf.expand_dims(x_logits, axis=2) + tf.expand_dims(key_logits, axis=1)
def apply(self, is_train, x, mask=None): out = self.other.apply(is_train, x, mask) w = tf.get_variable("project_w", (x.shape.as_list()[-1], out.shape.as_list()[-1])) return out + tf.tensordot(x, w, axes=[[len(x.shape)-1], [0]])
def apply(self, is_train, tensor1: tf.Tensor, tensor2: tf.Tensor) -> tf.Tensor: init = get_keras_initialization(self.init) w1 = tf.get_variable("w1", (tensor1.shape.as_list()[-1], tensor2.shape.as_list()[-1]), initializer=init) project1 = tf.tensordot(tensor1, w1, [[len(tensor1.shape)-1], [0]]) if self.scale: project1 /= np.sqrt(tensor1.shape.as_list()[-1]) project1 *= tensor2 elements = [tensor1, project1] if self.include_unscaled: elements.append(tensor2) return tf.concat(elements, axis=len(tensor1.shape) - 1)
def apply(self, is_train, x, mask=None): s = x.shape.as_list()[1] w = tf.get_variable("w", (s,), dtype=tf.float32, initializer=tf.constant_initializer(s / 3.0)) b = tf.get_variable("b", (), dtype=tf.float32, initializer=tf.zeros_initializer()) return tf.tensordot(x, w, [[1], [0]]) + b
def apply(self, is_train, x, mask=None): _, d1, _, d2 = x.shape.as_list() w = tf.get_variable("w", (d1, d2, self.n_out), dtype=tf.float32) return tf.tensordot(x, w, [[1, 3], [0, 1]])
def apply(self, is_train, x, c, mask=None, context_mask=None): x = dropout(x, self.keep_probs, is_train) c = dropout(c, self.context_keep_probs, is_train) init = get_keras_initialization(self.init) x_w = tf.get_variable("merge_x_weights", (x.shape.as_list()[-1], self.output_size), initializer=init) c_w = tf.get_variable("merge_context_weights", (c.shape.as_list()[-1], self.output_size), initializer=init) output = tf.tensordot(x, x_w, axes=[[2], [0]]) + tf.expand_dims(tf.matmul(c, c_w), 1) if self.use_dots: dots = tf.einsum("aij,aj->aij", x, c) dot_w = tf.get_variable("dot_weights", (c.shape.as_list()[-1], self.output_size), initializer=init) output += tf.tensordot(dots, dot_w, axes=[[2], [0]]) bais = tf.get_variable("merge_bias", (1, 1, self.output_size)) output += bais return get_keras_activation(self.activation)(output)
def context_shift(x, context, shift=True, scale=True, scope=None, reuse=None): B = context._shape_as_list()[-1] C = x._shape_as_list()[-1] ndim = len(x.shape) var_shape = [B] + [1] * (ndim - 2) + [C] with tf.variable_scope(scope, 'context_shift', reuse=reuse): output = x if scale: gamma = tf.get_variable('gamma', var_shape, initializer=tf.ones_initializer) output *= tf.tensordot(context, gamma, 1) if shift: beta = tf.get_variable('beta', var_shape, initializer=tf.zeros_initializer) output += tf.tensordot(context, beta, 1) output.set_shape(x.get_shape()) return output
def learn_comb_orth(poses, dm_shape, reuse=None, _float_type=tf.float32): with tf.variable_scope("learn_comb", reuse=reuse): comb_matrix = tf.get_variable( "matrix", [dm_shape[0], dm_shape[1]], initializer=identity_initializer(0), dtype=_float_type, trainable=False ) tf.add_to_collection(COMB_MATRIX_COLLECTION, comb_matrix) poses = tf.tensordot(poses, comb_matrix, [[2], [1]]) poses = tf.transpose(poses, [0, 1, 3, 2]) # Special update code def update_comb_mat(grad, lr): A = tf.matmul(tf.transpose(grad), comb_matrix) - \ tf.matmul(tf.transpose(comb_matrix), grad) I = tf.constant(np.eye(dm_shape[0]), dtype=_float_type) t1 = I + lr / 2 * A t2 = I - lr / 2 * A Y = tf.matmul(tf.matmul(tf.matrix_inverse(t1), t2), comb_matrix) return tf.assign(comb_matrix, Y) # Visualization cb_min = tf.reduce_min(comb_matrix) cb_max = tf.reduce_max(comb_matrix) comb_matrix_image = (comb_matrix - cb_min) / (cb_max - cb_min) * 255.0 comb_matrix_image = tf.cast(comb_matrix_image, tf.uint8) comb_matrix_image = tf.reshape(comb_matrix_image, [1, dm_shape[0], dm_shape[1], 1]) return poses, comb_matrix_image, update_comb_mat
def tensordot(x, y, axes): return tf.tensordot(x, y, axes=axes)
def dcost_fun(y, t, phi, batch_size): """computes the gradient of the cost wrt. the weights Args: y, t: the predicted probability and target variable tensors of shape (N_examples, K_classes) phi: feature tensor of shape (N_examples, dim_phi) Returns: The gradient tensor of shape (dim_phi, K_classes). """ return tf.tensordot(phi, (y - t), axes=([0],[0]))/batch_size
def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training): cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, for_decoder=True) for i in range(self.config.rnn_layers)]) encoder_hidden_size = int(enc_hidden_states.get_shape()[-1]) decoder_hidden_size = int(cell_dec.output_size) # if encoder and decoder have different sizes, add a projection layer if encoder_hidden_size != decoder_hidden_size: assert False, (encoder_hidden_size, decoder_hidden_size) with tf.variable_scope('hidden_projection'): kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32) # apply a relu to the projection for good measure enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state) enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]])) else: # flatten and repack the state enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state)) beam_width = self.config.training_beam_size if training else self.config.beam_size #cell_dec = ParentFeedingCellWrapper(cell_dec, tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width)) if self.config.apply_attention: attention = LuongAttention(decoder_hidden_size, tf.contrib.seq2seq.tile_batch(enc_hidden_states, beam_width), tf.contrib.seq2seq.tile_batch(self.input_length_placeholder, beam_width), probability_fn=tf.nn.softmax) cell_dec = AttentionWrapper(cell_dec, attention, cell_input_fn=lambda inputs, _: inputs, attention_layer_size=decoder_hidden_size, initial_cell_state=tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width)) enc_final_state = cell_dec.zero_state(self.batch_size * beam_width, dtype=tf.float32) else: enc_final_state = tf.contrib.seq2seq.tile_batch(enc_final_state, beam_width) print('enc_final_state', enc_final_state) linear_layer = tf_core_layers.Dense(self.config.output_size) go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start decoder = BeamSearchOptimizationDecoder(training, cell_dec, output_embed_matrix, go_vector, self.config.grammar.end, enc_final_state, beam_width=beam_width, output_layer=linear_layer, gold_sequence=self.output_placeholder if training else None, gold_sequence_length=(self.output_length_placeholder+1) if training else None) if self.config.use_grammar_constraints: raise NotImplementedError("Grammar constraints are not implemented for the beam search yet") final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, maximum_iterations=self.config.max_length) return final_outputs
def lyr_linear( name, s_x, odim, axis=-1, bias=True, w_init=None, b_init=None): ''' Like tf.xw_plus_b, but works on arbitrary shape Args: name: string s_x: tensor variable odim: integer axis: integer bias: boolean, whether to use bias w_init: initializer for W b_init: initializer for B ''' assert isinstance(odim, int) x_shape = s_x.get_shape().as_list() idim = x_shape[axis] ndim = len(x_shape) assert -ndim <= axis < ndim assert isinstance(idim, int) with tf.variable_scope(name): v_w = tf.get_variable( 'W', [idim, odim], initializer=w_init, dtype=hparams.FLOATX) if ndim == 1: s_y = tf.matmul(tf.expand_dims(s_x, 0), v_w) s_y = tf.squeeze(s_y, 0) elif ndim == 2: if axis % 2 == 1: s_y = tf.matmul(s_x, v_w) else: s_y = tf.matmul(tf.transpose(s_x), v_w) s_y = tf.transpose(s_x) elif (axis+1) % ndim == 0: s_batch_shp = tf.shape(s_x)[:-1] s_x = tf.reshape( s_x, [tf.reduce_prod(s_batch_shp, axis=None), x_shape[-1]]) s_y = tf.matmul(s_x, v_w) s_y = tf.reshape(s_y, tf.concat([s_batch_shp, [odim]], axis=0)) else: s_y = tf.tensordot(s_x, v_w, [[axis], [0]]) if bias: if b_init is None: b_init = tf.constant_initializer(0., dtype=hparams.FLOATX) v_b = tf.get_variable( 'B', [odim], initializer=b_init, dtype=hparams.FLOATX) s_b = tf.reshape(v_b, [odim] + [1] * (ndim - (axis % ndim) - 1)) s_y = s_y + s_b return s_y
def conv_step(nodes, children, feature_size, w_t, w_r, w_l, b_conv): """Convolve a batch of nodes and children. Lots of high dimensional tensors in this function. Intuitively it makes more sense if we did this work with while loops, but computationally this is more efficient. Don't try to wrap your head around all the tensor dot products, just follow the trail of dimensions. """ with tf.name_scope('conv_step'): # nodes is shape (batch_size x max_tree_size x feature_size) # children is shape (batch_size x max_tree_size x max_children) with tf.name_scope('trees'): # children_vectors will have shape # (batch_size x max_tree_size x max_children x feature_size) children_vectors = children_tensor(nodes, children, feature_size) # add a 4th dimension to the nodes tensor nodes = tf.expand_dims(nodes, axis=2) # tree_tensor is shape # (batch_size x max_tree_size x max_children + 1 x feature_size) tree_tensor = tf.concat([nodes, children_vectors], axis=2, name='trees') with tf.name_scope('coefficients'): # coefficient tensors are shape (batch_size x max_tree_size x max_children + 1) c_t = eta_t(children) c_r = eta_r(children, c_t) c_l = eta_l(children, c_t, c_r) # concatenate the position coefficients into a tensor # (batch_size x max_tree_size x max_children + 1 x 3) coef = tf.stack([c_t, c_r, c_l], axis=3, name='coef') with tf.name_scope('weights'): # stack weight matrices on top to make a weight tensor # (3, feature_size, output_size) weights = tf.stack([w_t, w_r, w_l], axis=0) with tf.name_scope('combine'): batch_size = tf.shape(children)[0] max_tree_size = tf.shape(children)[1] max_children = tf.shape(children)[2] # reshape for matrix multiplication x = batch_size * max_tree_size y = max_children + 1 result = tf.reshape(tree_tensor, (x, y, feature_size)) coef = tf.reshape(coef, (x, y, 3)) result = tf.matmul(result, coef, transpose_a=True) result = tf.reshape(result, (batch_size, max_tree_size, 3, feature_size)) # output is (batch_size, max_tree_size, output_size) result = tf.tensordot(result, weights, [[2, 3], [0, 1]]) # output is (batch_size, max_tree_size, output_size) return tf.nn.tanh(result + b_conv, name='conv')
def __init__(self, numberOfUnits, dictionarySize, maximumLength, inputFeatures = None, alwaysProvideInput = False): self.model = rnn.LSTMCell(numberOfUnits) self.loadingMatrix = tf.Variable(tf.random_uniform([numberOfUnits,dictionarySize],-1.0,1.0),name = 'LOADINGMATRIX') self.lengthPlaceholder = tf.placeholder(tf.int32, shape = [None],name = 'LENGTH') self.maximumLength = maximumLength self.dictionarySize = dictionarySize if inputFeatures != None: self.transformedInputFeatures = [ tf.layers.dense(inputs = inputFeatures, units = s, activation = tf.nn.tanh) for s in self.model.state_size ] self.transformedInputFeatures = rnn.LSTMStateTuple(*self.transformedInputFeatures) if alwaysProvideInput: self.alwaysProvidedInput = tf.layers.dense(inputs = inputFeatures, units = numberOfUnits, activation = tf.nn.tanh) else: self.alwaysProvidedInput = None else: self.transformedInputFeatures = None self.alwaysProvidedInput = None # Unrolls some number of steps maximumLength self.inputPlaceholder = tf.placeholder(tf.int32, shape = [None,maximumLength],name = 'INPUT') embeddedInputs = tf.nn.embedding_lookup(tf.transpose(self.loadingMatrix),self.inputPlaceholder) if alwaysProvideInput: # alwaysProvidedInput: [None,numberOfUnits] # we want to duplicate it along the time axis to get [None,numberOfTimesSteps,numberOfUnits] alwaysProvidedInput2 = tf.reshape(self.alwaysProvidedInput,[-1,1,numberOfUnits]) alwaysProvidedInput3 = tf.tile(alwaysProvidedInput2, [1,maximumLength,1]) embeddedInputs = embeddedInputs + alwaysProvidedInput3 self.outputs, self.states = tf.nn.dynamic_rnn(self.model, inputs = embeddedInputs, dtype = tf.float32, sequence_length = self.lengthPlaceholder, initial_state = self.transformedInputFeatures) # projectedOutputs: None x timeSteps x dictionarySize projectedOutputs = tf.tensordot(self.outputs, self.loadingMatrix, axes = [[2],[0]]) self.outputDistribution = tf.nn.log_softmax(projectedOutputs) self.hardOutputs = tf.cast(tf.argmax(projectedOutputs,dimension = 2),tf.int32) # A small graph for running the recurrence network forward one step self.statePlaceholders = [ tf.placeholder(tf.float32, [None,numberOfUnits], name = 'state0'), tf.placeholder(tf.float32, [None,numberOfUnits], name = 'state1')] self.oneInputPlaceholder = tf.placeholder(tf.int32, shape = [None], name = 'inputForOneStep') projectedInputs = tf.nn.embedding_lookup(tf.transpose(self.loadingMatrix),self.oneInputPlaceholder) if alwaysProvideInput: projectedInputs = projectedInputs + self.alwaysProvidedInput self.oneOutput, self.oneNewState = self.model(projectedInputs, rnn.LSTMStateTuple(*self.statePlaceholders)) self.oneNewState = [self.oneNewState[0],self.oneNewState[1]] self.oneOutputDistribution = tf.nn.log_softmax(tf.matmul(self.oneOutput, self.loadingMatrix)) # sequence prediction model with prediction fed into input
def learn_comb_orth_rmsprop(poses, dm_shape, reuse=None, _float_type=tf.float32): with tf.variable_scope("learn_comb", reuse=reuse): comb_matrix = tf.get_variable( "matrix", [dm_shape[0], dm_shape[1]], initializer=identity_initializer(0), dtype=_float_type, trainable=False ) comb_matrix_m = tf.get_variable( "matrix_momentum", [dm_shape[0], dm_shape[1]], initializer=tf.zeros_initializer(), dtype=_float_type, trainable=False ) tf.add_to_collection(COMB_MATRIX_COLLECTION, comb_matrix) poses = tf.tensordot(poses, comb_matrix, [[2], [1]]) poses = tf.transpose(poses, [0, 1, 3, 2]) # Special update code def update_comb_mat(grad, lr): I = tf.constant(np.eye(dm_shape[0]), dtype=_float_type) # Momentum update momentum_op = tf.assign(comb_matrix_m, comb_matrix_m * 0.99 + (1 - 0.99) * tf.square(grad)) with tf.control_dependencies([momentum_op]): # Matrix update scaled_grad = lr * grad / tf.sqrt(comb_matrix_m + 1.e-5) A = tf.matmul(tf.transpose(scaled_grad), comb_matrix) - \ tf.matmul(tf.transpose(comb_matrix), scaled_grad) t1 = I + 0.5 * A t2 = I - 0.5 * A Y = tf.matmul(tf.matmul(tf.matrix_inverse(t1), t2), comb_matrix) return tf.assign(comb_matrix, Y) # Visualization cb_min = tf.reduce_min(comb_matrix) cb_max = tf.reduce_max(comb_matrix) comb_matrix_image = (comb_matrix - cb_min) / (cb_max - cb_min) * 255.0 comb_matrix_image = tf.cast(comb_matrix_image, tf.uint8) comb_matrix_image = tf.reshape(comb_matrix_image, [1, dm_shape[0], dm_shape[1], 1]) return poses, comb_matrix_image, update_comb_mat