我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用keras.backend.expand_dims()。
def add_boundary_energy(x, b_start=None, b_end=None, mask=None): '''Given the observations x, it adds the start boundary energy b_start (resp. end boundary energy b_end on the start (resp. end) elements and multiplies the mask.''' if mask is None: if b_start is not None: x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1) if b_end is not None: x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1) else: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, 2) x *= mask if b_start is not None: mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1) start_mask = K.cast(K.greater(mask, mask_r), K.floatx()) x = x + start_mask * b_start if b_end is not None: mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1) end_mask = K.cast(K.greater(mask, mask_l), K.floatx()) x = x + end_mask * b_end return x
def get_initial_states(self, onto_nse_input, input_mask=None): input_to_read = onto_nse_input # (batch_size, num_words, num_senses, num_hyps, output_dim + 1) memory_input = input_to_read[:, :, :, :, :-1] # (bs, words, senses, hyps, output_dim) if input_mask is None: mem_0 = K.mean(memory_input, axis=(2, 3)) # (batch_size, num_words, output_dim) else: memory_mask = input_mask if K.ndim(onto_nse_input) != K.ndim(input_mask): memory_mask = K.expand_dims(input_mask) memory_mask = K.cast(memory_mask / (K.sum(memory_mask) + K.epsilon()), 'float32') mem_0 = K.sum(memory_input * memory_mask, axis=(2,3)) # (batch_size, num_words, output_dim) flattened_mem_0 = K.batch_flatten(mem_0) initial_states = self.reader.get_initial_states(input_to_read) initial_states += [flattened_mem_0] return initial_states
def call(self, x, mask=None): # input_shape = (batch_size, input_length, input_dim). This needs to be defined in build. initial_read_states = self.get_initial_states(x, mask) fake_writer_input = K.expand_dims(initial_read_states[0], dim=1) # (batch_size, 1, output_dim) initial_write_states = self.writer.get_initial_states(fake_writer_input) # h_0 and c_0 of the writer LSTM initial_states = initial_read_states + initial_write_states # last_output: (batch_size, output_dim) # all_outputs: (batch_size, input_length, output_dim) # last_states: # last_memory_state: (batch_size, input_length, output_dim) # last_output # last_writer_ct last_output, all_outputs, last_states = self.loop(x, initial_states, mask) last_memory = last_states[0] if self.return_mode == "last_output": return last_output elif self.return_mode == "all_outputs": return all_outputs else: # return mode is output_and_memory expanded_last_output = K.expand_dims(last_output, dim=1) # (batch_size, 1, output_dim) # (batch_size, 1+input_length, output_dim) return K.concatenate([expanded_last_output, last_memory], axis=1)
def call(self, x, mask=None): # x: (batch_size, input_length, input_dim) if mask is None: return K.mean(x, axis=1) # (batch_size, input_dim) else: # This is to remove padding from the computational graph. if K.ndim(mask) > K.ndim(x): # This is due to the bug in Bidirectional that is passing the input mask # instead of computing output mask. # TODO: Fix the implementation of Bidirectional. mask = K.any(mask, axis=(-2, -1)) if K.ndim(mask) < K.ndim(x): mask = K.expand_dims(mask) masked_input = switch(mask, x, K.zeros_like(x)) weights = K.cast(mask / (K.sum(mask) + K.epsilon()), 'float32') return K.sum(masked_input * weights, axis=1) # (batch_size, input_dim)
def step_with_training(self, training=None): def step(inputs, states): input_shape = K.int_shape(inputs) y_tm1 = self.layer.preprocess_input( K.expand_dims(states[0], axis=1), training ) y_tm1 = K.reshape(y_tm1, (-1, input_shape[-1])) inputs_sum = tf.reduce_sum(inputs) def inputs_f(): return inputs def output_f(): return y_tm1 current_inputs = tf.case( [(tf.equal(inputs_sum, 0.0), output_f)], default=inputs_f ) return self.layer.step( current_inputs, states ) return step
def call(self, inputs, mask=None): input_shape = K.int_shape(inputs) outputs = self.layer.call(inputs) outputs = K.permute_dimensions( outputs, self.permute_pattern + [len(input_shape) - 1] ) outputs_shape = self.compute_output_shape(input_shape) outputs = K.reshape( outputs, (-1, outputs_shape[1], outputs_shape[2]) ) mask_tensor = self.compute_mask( inputs, mask ) mask_tensor = K.cast(mask_tensor, K.floatx()) mask_tensor = K.expand_dims(mask_tensor) mask_output = K.repeat_elements( mask_tensor, outputs_shape[2], 2 ) return outputs * mask_output
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': # todo: check that this is correct return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = K.dot(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ? to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
def _cosine_matrix(self, x1, x2): """Cosine similarity matrix. Calculate the cosine similarities between each forward (or backward) contextual embedding h_i_p and every forward (or backward) contextual embeddings of the other sentence # Arguments x1: (batch_size, x1_timesteps, embedding_size) x2: (batch_size, x2_timesteps, embedding_size) # Output shape (batch_size, x1_timesteps, x2_timesteps) """ # expand h1 shape to (batch_size, x1_timesteps, 1, embedding_size) x1 = K.expand_dims(x1, axis=2) # expand x2 shape to (batch_size, 1, x2_timesteps, embedding_size) x2 = K.expand_dims(x2, axis=1) # cosine matrix (batch_size, h1_timesteps, h2_timesteps) cos_matrix = self._cosine_similarity(x1, x2) return cos_matrix
def _mean_attentive_vectors(self, x2, cosine_matrix): """Mean attentive vectors. Calculate mean attentive vector for the entire sentence by weighted summing all the contextual embeddings of the entire sentence # Arguments x2: sequence vectors, (batch_size, x2_timesteps, embedding_size) cosine_matrix: cosine similarities matrix of x1 and x2, (batch_size, x1_timesteps, x2_timesteps) # Output shape (batch_size, x1_timesteps, embedding_size) """ # (batch_size, x1_timesteps, x2_timesteps, 1) expanded_cosine_matrix = K.expand_dims(cosine_matrix, axis=-1) # (batch_size, 1, x2_timesteps, embedding_size) x2 = K.expand_dims(x2, axis=1) # (batch_size, x1_timesteps, embedding_size) weighted_sum = K.sum(expanded_cosine_matrix * x2, axis=2) # (batch_size, x1_timesteps, 1) sum_cosine = K.expand_dims(K.sum(cosine_matrix, axis=-1) + self.epsilon, axis=-1) # (batch_size, x1_timesteps, embedding_size) attentive_vector = weighted_sum / sum_cosine return attentive_vector
def _full_matching(self, h1, h2, w): """Full matching operation. # Arguments h1: (batch_size, h1_timesteps, embedding_size) h2: (batch_size, h2_timesteps, embedding_size) w: weights of one direction, (mp_dim, embedding_size) # Output shape (batch_size, h1_timesteps, mp_dim) """ # h2 forward last step hidden vector, (batch_size, embedding_size) h2_last_state = h2[:, -1, :] # h1 * weights, (batch_size, h1_timesteps, mp_dim, embedding_size) h1 = self._time_distributed_multiply(h1, w) # h2_last_state * weights, (batch_size, mp_dim, embedding_size) h2 = self._time_distributed_multiply(h2_last_state, w) # reshape to (batch_size, 1, mp_dim, embedding_size) h2 = K.expand_dims(h2, axis=1) # matching vector, (batch_size, h1_timesteps, mp_dim) matching = self._cosine_similarity(h1, h2) return matching
def _max_pooling_matching(self, h1, h2, w): """Max pooling matching operation. # Arguments h1: (batch_size, h1_timesteps, embedding_size) h2: (batch_size, h2_timesteps, embedding_size) w: weights of one direction, (mp_dim, embedding_size) # Output shape (batch_size, h1_timesteps, mp_dim) """ # h1 * weights, (batch_size, h1_timesteps, mp_dim, embedding_size) h1 = self._time_distributed_multiply(h1, w) # h2 * weights, (batch_size, h2_timesteps, mp_dim, embedding_size) h2 = self._time_distributed_multiply(h2, w) # reshape v1 to (batch_size, h1_timesteps, 1, mp_dim, embedding_size) h1 = K.expand_dims(h1, axis=2) # reshape v1 to (batch_size, 1, h2_timesteps, mp_dim, embedding_size) h2 = K.expand_dims(h2, axis=1) # cosine similarity, (batch_size, h1_timesteps, h2_timesteps, mp_dim) cos = self._cosine_similarity(h1, h2) # (batch_size, h1_timesteps, mp_dim) matching = K.max(cos, axis=2) return matching
def call(self, x): # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if self.window_size > 1: x = K.temporal_padding(x, (self.window_size-1, 0)) x = K.expand_dims(x, 2) # add a dummy dimension # z, g output = K.conv2d(x, self.kernel, strides=self.strides, padding='valid', data_format='channels_last') output = K.squeeze(output, 2) # remove the dummy dimension if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') z = output[:, :, :self.output_dim] g = output[:, :, self.output_dim:] return self.activation(z) * K.sigmoid(g)
def preprocess_input(self, inputs, training=None): if self.window_size > 1: inputs = K.temporal_padding(inputs, (self.window_size-1, 0)) inputs = K.expand_dims(inputs, 2) # add a dummy dimension output = K.conv2d(inputs, self.kernel, strides=self.strides, padding='valid', data_format='channels_last') output = K.squeeze(output, 2) # remove the dummy dimension if self.use_bias: output = K.bias_add(output, self.bias, data_format='channels_last') if self.dropout is not None and 0. < self.dropout < 1.: z = output[:, :, :self.units] f = output[:, :, self.units:2 * self.units] o = output[:, :, 2 * self.units:] f = K.in_train_phase(1 - _dropout(1 - f, self.dropout), f, training=training) return K.concatenate([z, f, o], -1) else: return output
def call(self, inputs): if self.data_format == 'channels_first': sq = K.mean(inputs, [2, 3]) else: sq = K.mean(inputs, [1, 2]) ex = K.dot(sq, self.kernel1) if self.use_bias: ex = K.bias_add(ex, self.bias1) ex= K.relu(ex) ex = K.dot(ex, self.kernel2) if self.use_bias: ex = K.bias_add(ex, self.bias2) ex= K.sigmoid(ex) if self.data_format == 'channels_first': ex = K.expand_dims(ex, -1) ex = K.expand_dims(ex, -1) else: ex = K.expand_dims(ex, 1) ex = K.expand_dims(ex, 1) return inputs * ex
def _forward(x, reduce_step, initial_states, U, mask=None): '''Forward recurrence of the linear chain crf.''' def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def call(self, x, mask=None): if mask is None: return K.mean(x, axis=1) mask = K.cast(mask, "float32") expanded_mask = K.expand_dims(mask) # zero embedded vectors which come from masked characters x_masked = x * expanded_mask # how many non-masked characters are in each row? mask_counts = K.sum(mask, axis=-1) # add up the vector representations along the time dimension # the result should have dimension (n_samples, n_embedding_dims) x_sums = K.sum(x_masked, axis=1) # cast the number of non-zero elements to float32 and # give it an extra dimension so it can broadcast properly in # an elementwise divsion counts_cast = K.expand_dims(mask_counts) return x_sums / counts_cast
def _backward(gamma, mask): '''Backward recurrence of the linear chain crf.''' gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = KC.batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
def preprocess_image(image_path, load_dims=False, style_image=False): global img_WIDTH, img_HEIGHT, aspect_ratio, b_scale_ratio_height, b_scale_ratio_width img = imread(image_path, mode="RGB") # Prevents crashes due to PNG images (ARGB) if load_dims: img_WIDTH = img.shape[0] img_HEIGHT = img.shape[1] aspect_ratio = img_HEIGHT / img_WIDTH if style_image: b_scale_ratio_width = float(img.shape[0]) / img_WIDTH b_scale_ratio_height = float(img.shape[1]) / img_HEIGHT img = imresize(img, (img_width, img_height)) img = img.transpose((2, 0, 1)).astype('float64') img = np.expand_dims(img, axis=0) return img # util function to convert a tensor into a valid image
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True)) return patches, patches_norm # get tensor representations of our images
def call(self, x, mask=None): def image_expand(tensor): return K.expand_dims(K.expand_dims(tensor, -1), -1) def batch_image_expand(tensor): return image_expand(K.expand_dims(tensor, 0)) hw = K.cast(x.shape[2] * x.shape[3], K.floatx()) mu = K.sum(x, [-1, -2]) / hw mu_vec = image_expand(mu) sig2 = K.sum(K.square(x - mu_vec), [-1, -2]) / hw y = (x - mu_vec) / (K.sqrt(image_expand(sig2)) + K.epsilon()) scale = batch_image_expand(self.scale) shift = batch_image_expand(self.shift) return scale*y + shift # else: # raise NotImplemented("Please complete `CycGAN/layers/padding.py` to run on backend {}.".format(K.backend()))
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs(x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True)) return patches, patches_norm
def step(self, x, states): hidden_input = states[0] B_U = states[1] B_W = states[2] # Make last hidden input the residual of the prediction and # the last available feature. if self.inner_input_dim > 0: update = K.expand_dims(hidden_input[:, -1] - x[:, -1]) hidden_input = K.concatenate((hidden_input[:, :-1], update)) if self.ma_only: h = self.b else: h = K.dot(x * B_W, self.W) + self.b if self.inner_input_dim > 0: output = self.activation(h + K.dot(hidden_input * B_U, self.U)) new_state = K.concatenate((hidden_input[:, 1:], output)) return output, [new_state] else: output = self.activation(h) return output, [output]
def step(self, x, states): hidden_input = states[0] B_U = states[1] # Dropout mask for U B_W = states[2] # Dropout mask for W # Make last hidden input the residual of the prediction and # the last available feature. if self.inner_input_dim > 0: update = K.expand_dims(hidden_input[:, -1] - x[:, -1]) hidden_input = K.concatenate((hidden_input[:, :-1], update)) if self.ma_only: h = self.b else: h = K.dot(x * B_W, self.W) + self.b if self.inner_input_dim > 0: output = self.activation(h + K.dot(hidden_input * B_U, self.U)) new_state = K.concatenate((hidden_input[:, 1:], output)) return output, [new_state] else: output = self.activation(h) return output, [output]
def add_boundary_energy(x, b_start=None, b_end=None, mask=None): """Given the observations x, it adds the start boundary energy b_start (resp. end boundary energy b_end on the start (resp. end) elements and multiplies the mask.""" if mask is None: if b_start is not None: x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1) if b_end is not None: x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1) else: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, 2) x *= mask if b_start is not None: mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1) start_mask = K.cast(K.greater(mask, mask_r), K.floatx()) x = x + start_mask * b_start if b_end is not None: mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1) end_mask = K.cast(K.greater(mask, mask_l), K.floatx()) x = x + end_mask * b_end return x
def _forward(x, reduce_step, initial_states, U, mask=None): """Forward recurrence of the linear chain crf.""" def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
def _backward(gamma, mask): """Backward recurrence of the linear chain crf.""" gamma = K.cast(gamma, 'int32') def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)] initial_states = [K.expand_dims(K.zeros_like(gamma[:, 0, 0]), 0)] _, y_rev, _ = K.rnn(_backward_step, gamma, initial_states, go_backwards=True) y = K.reverse(y_rev, 1) if mask is not None: mask = K.cast(mask, dtype='int32') # mask output y *= mask # set masked values to -1 y += -(1 - mask) return y
def plotVAEpyplot(self, logdir, prefix, ctable=None, reverseUtt=False, batch_size=128, debug=False): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ticks = [[-1,-0.5,0,0.5,1]]*self.latentDim samplePoints = np.array(np.meshgrid(*ticks)).T.reshape(-1,3) input_placeholder = np.ones(tuple([len(samplePoints)] + list(self.phon.output_shape[1:-1]) + [1])) preds = self.decode_word([samplePoints, input_placeholder], batch_size=batch_size) if reverseUtt: preds = getYae(preds, reverseUtt) reconstructed = reconstructXae(np.expand_dims(preds.argmax(-1), -1), ctable, maxLen=5) for i in range(len(samplePoints)): ax.text(samplePoints[i,0], samplePoints[i,1], samplePoints[i,2], reconstructed[i]) ax.set_xlim3d(-1, 1) ax.set_ylim3d(-1, 1) ax.set_zlim3d(-1, 1) pickle.dump(fig, file(logdir + '/' + prefix + '_VAEplot.3D.obj', 'wb')) plt.close(fig)
def plotVAEplotly(self, logdir, prefix, ctable=None, reverseUtt=False, batch_size=128, debug=False): ticks = [[-1,-0.5,0,0.5,1]]*self.latentDim samplePoints = np.array(np.meshgrid(*ticks)).T.reshape(-1,3) input_placeholder = np.ones(tuple([len(samplePoints)] + list(self.phon.output_shape[1:-1]) + [1])) preds = self.decode_word([samplePoints, input_placeholder], batch_size=batch_size) if reverseUtt: preds = getYae(preds, reverseUtt) reconstructed = reconstructXae(np.expand_dims(preds.argmax(-1), -1), ctable, maxLen=5) data = [go.Scatter3d( x = samplePoints[:,0], y = samplePoints[:,1], z = samplePoints[:,2], text = reconstructed, mode='text' )] layout = go.Layout() fig = go.Figure(data=data, layout=layout) plotly.offline.plot(fig, filename=logdir + '/' + prefix + '_VAEplot.html', auto_open=False)
def update(self, Xs, Xs_mask, targets, batch_size=128): seg_shift = self.seg_shift charDim = Xs.shape[-1] maxChar = Xs_mask.shape[-1] seg_inputs = np.zeros((len(Xs), maxChar + seg_shift, charDim)) seg_inputs[:, :maxChar, :] = Xs seg_mask = np.zeros((len(Xs_mask), maxChar + seg_shift)) seg_mask[:, seg_shift:] = Xs_mask seg_mask = np.expand_dims(seg_mask, -1) seg_targets = np.zeros((len(targets), maxChar + seg_shift, 1)) seg_targets[:, seg_shift:, :] = targets segHist = self.network.fit([seg_inputs, seg_mask], seg_targets, batch_size=batch_size, epochs=1) return segHist
def createLayers(): x = Input(shape=env.observation_space.shape) if args.batch_norm: h = BatchNormalization()(x) else: h = x for i in xrange(args.layers): h = Dense(args.hidden_size, activation=args.activation)(h) if args.batch_norm and i != args.layers - 1: h = BatchNormalization()(h) y = Dense(env.action_space.n + 1)(h) if args.advantage == 'avg': z = Lambda(lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:] - K.mean(a[:, 1:], keepdims=True), output_shape=(env.action_space.n,))(y) elif args.advantage == 'max': z = Lambda(lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:] - K.max(a[:, 1:], keepdims=True), output_shape=(env.action_space.n,))(y) elif args.advantage == 'naive': z = Lambda(lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:], output_shape=(env.action_space.n,))(y) else: assert False return x, z
def lin_interpolation_2d(inp, dim): num_rows, num_cols, num_filters = K.int_shape(inp)[1:] conv = SeparableConv2D(num_filters, (num_rows, num_cols), use_bias=False) x = conv(inp) w = conv.get_weights() w[0].fill(0) w[1].fill(0) linspace = linspace_2d(num_rows, num_cols, dim=dim) for i in range(num_filters): w[0][:,:, i, 0] = linspace[:,:] w[1][0, 0, i, i] = 1. conv.set_weights(w) conv.trainable = False x = Lambda(lambda x: K.squeeze(x, axis=1))(x) x = Lambda(lambda x: K.squeeze(x, axis=1))(x) x = Lambda(lambda x: K.expand_dims(x, axis=-1))(x) return x
def step(self, input_energy_t, states, return_logZ=True): # not in the following `prev_target_val` has shape = (B, F) # where B = batch_size, F = output feature dim # Note: `i` is of float32, due to the behavior of `K.rnn` prev_target_val, i, chain_energy = states[:3] t = K.cast(i[0, 0], dtype='int32') if len(states) > 3: if K.backend() == 'theano': m = states[3][:, t:(t + 2)] else: m = K.tf.slice(states[3], [0, t], [-1, 2]) input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) chain_energy = chain_energy * K.expand_dims(K.expand_dims(m[:, 0] * m[:, 1])) # (1, F, F)*(B, 1, 1) -> (B, F, F) if return_logZ: energy = chain_energy + K.expand_dims(input_energy_t - prev_target_val, 2) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) return new_target_val, [new_target_val, i + 1] else: energy = chain_energy + K.expand_dims(input_energy_t + prev_target_val, 2) min_energy = K.min(energy, 1) argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) # cast for tf-version `K.rnn` return argmin_table, [min_energy, i + 1]
def get_attention_initial_state(self, inputs): """Creates initial state for attention mechanism. By default the attention representation `attention_h` computed by attention_step is passed as attention state between timesteps. Extending attention implementations that requires additional states must modify over implement this method accordingly. # Arguments inputs: layer inputs # Returns list (length one) of initial state (zeros) """ # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) initial_state = K.tile(initial_state, [1, self.attention_output_dim]) # (samples, output_dim) return [initial_state]
def _get_attention_and_kappa(self, attended, params, kappa_tm1): """ # Args params: the params of this distribution attended: the attended sequence (samples, timesteps, features) # Returns attention tensor (samples, features) """ att_idx = K.constant(np.arange(self.attended_shape[1])[None, :, None]) alpha, beta, kappa_diff = self.distribution.split_param_types(params) kappa = kappa_diff + kappa_tm1 kappa_ = K.expand_dims(kappa, 1) beta_ = K.expand_dims(beta, 1) alpha_ = K.expand_dims(alpha, 1) attention_w = K.sum( alpha_ * K.exp(- beta_ * K.square(kappa_ - att_idx)), axis=-1, # keepdims=True ) attention_w = K.expand_dims(attention_w, -1) # TODO remove and keepdims attention = K.sum(attention_w * attended, axis=1) return attention, kappa
def mean_log_Gaussian_like(y_true, parameters): """Mean Log Gaussian Likelihood distribution Note: The 'c' variable is obtained as global variable """ #Note: The output size will be (c + 2) * m = 6 c = 1 #The number of outputs we want to predict m = 2 #The number of distributions we want to use in the mixture components = K.reshape(parameters,[-1, c + 2, m]) mu = components[:, :c, :] sigma = components[:, c, :] alpha = components[:, c + 1, :] alpha = K.softmax(K.clip(alpha,1e-8,1.)) exponent = K.log(alpha) - .5 * float(c) * K.log(2 * np.pi) \ - float(c) * K.log(sigma) \ - K.sum((K.expand_dims(y_true,2) - mu)**2, axis=1)/(2*(sigma)**2) log_gauss = log_sum_exp(exponent, axis=1) res = - K.mean(log_gauss) return res
def mean_log_LaPlace_like(y_true, parameters): """Mean Log Laplace Likelihood distribution Note: The 'c' variable is obtained as global variable """ #Note: The output size will be (c + 2) * m = 6 c = 1 #The number of outputs we want to predict m = 2 #The number of distributions we want to use in the mixture components = K.reshape(parameters,[-1, c + 2, m]) mu = components[:, :c, :] sigma = components[:, c, :] alpha = components[:, c + 1, :] alpha = K.softmax(K.clip(alpha,1e-2,1.)) exponent = K.log(alpha) - float(c) * K.log(2 * sigma) \ - K.sum(K.abs(K.expand_dims(y_true,2) - mu), axis=1)/(sigma) log_gauss = log_sum_exp(exponent, axis=1) res = - K.mean(log_gauss) return res
def call(self, x, mask=None): # 1. transform, (None, steps, idim)*(idim, outdim) -> (None, steps, outdim) u = self.attn_activation(K.dot(x, self.W_s) + self.B_s) # 2. * attention sum : {(None, steps, outdim) *(outdim), axis = 2} -> (None, steps) att = K.sum(u*self.Attention_vec, axis=2) # 3. softmax, (None, steps) att = K.exp(att) att_sum = K.sum(att, axis=1) att_sum = att_sum.dimshuffle(0,'x') #att_sum = K.expand_dims(att_sum, 1) att = att/att_sum # 4. weighted sum att = att.dimshuffle(0, 1, 'x') #att = K.expand_dims(att, 2) va = att*x v = K.sum(va, axis=1) return v
def kde_entropy(output, var): # Kernel density estimate of entropy, in nats dims = K.cast(K.shape(output)[1], K.floatx() ) N = K.cast(K.shape(output)[0], K.floatx() ) normconst = (dims/2.0)*K.log(2*np.pi*var) # get dists matrix x2 = K.expand_dims(K.sum(K.square(output), axis=1), 1) dists = x2 + K.transpose(x2) - 2*K.dot(output, K.transpose(output)) dists = dists / (2*var) lprobs = logsumexp(-dists, axis=1) - K.log(N) - normconst h = -K.mean(lprobs) return h
def call(self, x, mask=None): if self.mode == 'max': return K.max(x, axis=self.axis) elif self.mode == 'mean': return K.mean(x, axis=self.axis) elif self.mode == 'sum': return K.sum(x, axis=self.axis) elif self.mode == 'concat': assert len(x) >= 2 assert x[0].ndim == 3 def _transform(target): # Expand first dimension in any case target = K.expand_dims(target, dim=1) if self.axis == 2: # Repeat target along the time dimension target = K.repeat_elements( target, x[0].shape[1], axis=1) return target targets = map(lambda t: _transform(t) if t.ndim == 2 else t, x[1:]) return K.concatenate([x[0]] + targets, axis=self.axis) else: raise NotImplemented
def _spectrogram_mono(self, x): '''x.shape : (None, 1, len_src), returns 2D batch of a mono power-spectrogram''' x = K.permute_dimensions(x, [0, 2, 1]) x = K.expand_dims(x, 3) # add a dummy dimension (channel axis) subsample = (self.n_hop, 1) output_real = K.conv2d(x, self.dft_real_kernels, strides=subsample, padding=self.padding, data_format='channels_last') output_imag = K.conv2d(x, self.dft_imag_kernels, strides=subsample, padding=self.padding, data_format='channels_last') output = output_real ** 2 + output_imag ** 2 # now shape is (batch_sample, n_frame, 1, freq) if self.image_data_format == 'channels_last': output = K.permute_dimensions(output, [0, 3, 1, 2]) else: output = K.permute_dimensions(output, [0, 2, 3, 1]) return output
def time_distributed_masked_max(x, m): """ Computes max along the first (time) dimension. In: x - input; a 3D tensor m - mask m_value - value for masking """ # place infinities where mask is off m_value = 0.0 tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0) x_with_inf = x + K.expand_dims(tmp) x_max = K.max(x_with_inf, axis=1) r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max) return r ## classes ## # Transforms existing layers to masked layers
def get_constants(self, x): # Reimplementing because ndim of x is 5. (samples, timesteps, num_senses, num_hyps, input_dim) if K.ndim(x) == 4: x = K.expand_dims(x) sense_hyp_stripped_x = x[:, :, 0, 0, :-1] # (samples, timesteps, input_dim), just like LSTM input. # We need the same constants as regular LSTM. lstm_constants = super(OntoAttentionLSTM, self).get_constants(sense_hyp_stripped_x) return lstm_constants
def summarize_memory(o_t, mem_tm1): ''' This method selects the relevant parts of the memory given the read output and summarizes the memory. Implements Equations 2-3 or 8-11 in the paper. ''' # Selecting relevant memory slots, Equation 2 z_t = K.softmax(K.sum(K.expand_dims(o_t, dim=1) * mem_tm1, axis=2)) # (batch_size, input_length) # Summarizing memory, Equation 3 m_rt = K.sum(K.expand_dims(z_t, dim=2) * mem_tm1, axis=1) # (batch_size, output_dim) return z_t, m_rt
def update_memory(self, z_t, h_t, mem_tm1): ''' This method takes the attention vector (z_t), writer output (h_t) and previous timestep's memory (mem_tm1) and updates the memory. Implements equations 6, 14 or 15. ''' tiled_z_t = K.tile(K.expand_dims(z_t), (self.output_dim)) # (batch_size, input_length, output_dim) input_length = K.shape(mem_tm1)[1] # (batch_size, input_length, output_dim) tiled_h_t = K.permute_dimensions(K.tile(K.expand_dims(h_t), (input_length)), (0, 2, 1)) # Updating memory. First term in summation corresponds to selective forgetting and the second term to # selective addition. Equation 6. mem_t = mem_tm1 * (1 - tiled_z_t) + tiled_h_t * tiled_z_t # (batch_size, input_length, output_dim) return mem_t