def masked_apply(tensor, op, mask): """Applies `op` to tensor only at locations indicated by `mask` and sets the rest to zero. Similar to doing `tensor = tf.where(mask, op(tensor), tf.zeros_like(tensor))` but it behaves correctly when `op(tensor)` is NaN or inf while tf.where does not. :param tensor: tf.Tensor :param op: tf.Op :param mask: tf.Tensor with dtype == bool :return: tf.Tensor """ chosen = tf.boolean_mask(tensor, mask) applied = op(chosen) idx = tf.to_int32(tf.where(mask)) result = tf.scatter_nd(idx, applied, tf.shape(tensor)) return result
def cross_entropy_sequence_loss(logits, targets, sequence_length): """Calculates the per-example cross-entropy loss for a sequence of logits and masks out all losses passed the sequence length. Args: logits: Logits of shape `[T, B, vocab_size]` targets: Target classes of shape `[T, B]` sequence_length: An int32 tensor of shape `[B]` corresponding to the length of each input Returns: A tensor of shape [T, B] that contains the loss per example, per time step. """ with tf.name_scope("cross_entropy_sequence_loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # Mask out the losses we don't care about loss_mask = tf.sequence_mask( tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0])) losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0]) return losses
def mu_law_encode_nonlinear(audio, quantization_channels=256): ''' Compress the waveform amplitudes using mu-law non-linearity. NOTE: This mu-law functions as a non-linear function as opposed to quantization. ''' with tf.name_scope('encode'): mu = tf.to_float(quantization_channels - 1) # Perform mu-law companding transformation (ITU-T, 1988). # Minimum operation is here to deal with rare large amplitudes caused # by resampling. safe_audio_abs = tf.minimum(tf.abs(audio), 1.0) magnitude = tf.log1p(mu * safe_audio_abs) / tf.log1p(mu) signal = tf.multiply(tf.sign(audio), magnitude, name='mulaw') # Quantize signal to the specified number of levels. # return tf.to_int32((signal + 1) / 2 * mu + 0.5) return signal
def _crop_pool_layer(self, bottom, rois, name): with tf.variable_scope(name) as scope: batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1]) # Get the normalized coordinates of bboxes bottom_shape = tf.shape(bottom) height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0]) width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0]) x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height # Won't be back-propagated to rois anyway, but to save time bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], 1)) if cfg.RESNET.MAX_POOL: pre_pool_size = cfg.POOLING_SIZE * 2 crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops") crops = slim.max_pool2d(crops, [2, 2], padding='SAME') else: crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [cfg.POOLING_SIZE, cfg.POOLING_SIZE], name="crops") return crops # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1
def _anchor_target_layer(self, rpn_cls_score, name): with tf.variable_scope(name) as scope: rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = tf.py_func( anchor_target_layer, [rpn_cls_score, self._gt_boxes, self._im_info, self._feat_stride, self._anchors, self._num_anchors], [tf.float32, tf.float32, tf.float32, tf.float32], name="anchor_target") rpn_labels.set_shape([1, 1, None, None]) rpn_bbox_targets.set_shape([1, None, None, self._num_anchors * 4]) rpn_bbox_inside_weights.set_shape([1, None, None, self._num_anchors * 4]) rpn_bbox_outside_weights.set_shape([1, None, None, self._num_anchors * 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") self._anchor_targets['rpn_labels'] = rpn_labels self._anchor_targets['rpn_bbox_targets'] = rpn_bbox_targets self._anchor_targets['rpn_bbox_inside_weights'] = rpn_bbox_inside_weights self._anchor_targets['rpn_bbox_outside_weights'] = rpn_bbox_outside_weights self._score_summaries.update(self._anchor_targets) return rpn_labels
def _crop_pool_layer(self, bottom, rois, name): with tf.variable_scope(name) as scope: batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1]) # Get the normalized coordinates of bounding boxes bottom_shape = tf.shape(bottom) height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0]) width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0]) x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height # Won't be back-propagated to rois anyway, but to save time bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1)) pre_pool_size = cfg.POOLING_SIZE * 2 crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops") return slim.max_pool2d(crops, [2, 2], padding='SAME')
def _proposal_target_layer(self, rois, roi_scores, name): with tf.variable_scope(name) as scope: rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = tf.py_func( proposal_target_layer, [rois, roi_scores, self._gt_boxes, self._num_classes], [tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32], name="proposal_target") rois.set_shape([cfg.TRAIN.BATCH_SIZE, 5]) roi_scores.set_shape([cfg.TRAIN.BATCH_SIZE]) labels.set_shape([cfg.TRAIN.BATCH_SIZE, 1]) bbox_targets.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4]) bbox_inside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4]) bbox_outside_weights.set_shape([cfg.TRAIN.BATCH_SIZE, self._num_classes * 4]) self._proposal_targets['rois'] = rois self._proposal_targets['labels'] = tf.to_int32(labels, name="to_int32") self._proposal_targets['bbox_targets'] = bbox_targets self._proposal_targets['bbox_inside_weights'] = bbox_inside_weights self._proposal_targets['bbox_outside_weights'] = bbox_outside_weights self._score_summaries.update(self._proposal_targets) return rois, roi_scores
def _anchor_component(self): with tf.variable_scope('ANCHOR_' + self._tag) as scope: # just to get the shape right height = tf.to_int32(tf.ceil(self._im_info[0] / np.float32(self._feat_stride[0]))) width = tf.to_int32(tf.ceil(self._im_info[1] / np.float32(self._feat_stride[0]))) anchors, anchor_length = tf.py_func(generate_anchors_pre, [height, width, self._feat_stride, self._anchor_scales, self._anchor_ratios], [tf.float32, tf.int32], name="generate_anchors") anchors.set_shape([None, 4]) anchor_length.set_shape([]) self._anchors = anchors self._anchor_length = anchor_length # [Hand Detection] Batch normalization # http://stackoverflow.com/a/34634291/2267819 # Note that this is different from the paper(they use another method)
def _parse_example(self, serialized): """Unpack a serialized example to Tensor.""" feats = self._get_data_features() sz_feats = self._get_sz_features() for s in sz_feats: feats[s] = sz_feats[s] sample = tf.parse_single_example(serialized, features=feats) data = {} for i, f in enumerate(self.FEATURES): s = tf.to_int32(sample[f+'_sz']) data[f] = tf.decode_raw(sample[f], self.dtypes[f], name='decode_{}'.format(f)) data[f] = tf.reshape(data[f], s) return data
def _decode_lambda(self, args): """ Decoding within tensorflow graph. In case kenlm_directory is specified, a modified version of tensorflow (available at https://github.com/timediv/tensorflow-with-kenlm) is needed to run that extends ctc_decode to use a kenlm decoder. :return: Most probable decoded sequence. Important: blank labels are returned as `-1`. """ import tensorflow as tf prediction_batch, prediction_lengths = args log_prediction_batch = tf.log(tf.transpose(prediction_batch, perm=[1, 0, 2]) + 1e-8) prediction_length_batch = tf.to_int32(tf.squeeze(prediction_lengths, axis=[1])) (decoded, log_prob) = self.ctc_get_decoded_and_log_probability_batch(log_prediction_batch, prediction_length_batch) return single([tf.sparse_to_dense(st.indices, st.dense_shape, st.values, default_value=-1) for st in decoded])
def process_image(img, scale, isotropic, crop, mean): '''Crops, scales, and normalizes the given image. scale : The image wil be first scaled to this size. If isotropic is true, the smaller side is rescaled to this, preserving the aspect ratio. crop : After scaling, a central crop of this size is taken. mean : Subtracted from the image ''' # Rescale if isotropic: img_shape = tf.to_float(tf.shape(img)[:2]) min_length = tf.minimum(img_shape[0], img_shape[1]) new_shape = tf.to_int32((scale / min_length) * img_shape) else: new_shape = tf.pack([scale, scale]) img = tf.image.resize_images(img, new_shape[0], new_shape[1]) # Center crop # Use the slice workaround until crop_to_bounding_box supports deferred tensor shapes # See: https://github.com/tensorflow/tensorflow/issues/521 offset = (new_shape - crop) / 2 img = tf.slice(img, begin=tf.pack([offset[0], offset[1], 0]), size=tf.pack([crop, crop, -1])) # Mean subtraction return tf.to_float(img) - mean
def _largest_size_at_most(height, width, largest_side): """Computes new shape with the largest side equal to `largest_side`. Computes new shape with the largest side equal to `largest_side` while preserving the original aspect ratio. Args: height: an int32 scalar tensor indicating the current height. width: an int32 scalar tensor indicating the current width. largest_side: A python integer or scalar `Tensor` indicating the size of the largest side after resize. Returns: new_height: an int32 scalar tensor indicating the new height. new_width: and int32 scalar tensor indicating the new width. """ largest_side = tf.convert_to_tensor(largest_side, dtype=tf.int32) height = tf.to_float(height) width = tf.to_float(width) largest_side = tf.to_float(largest_side) scale = tf.cond(tf.greater(height, width), lambda: largest_side / height, lambda: largest_side / width) new_height = tf.to_int32(height * scale) new_width = tf.to_int32(width * scale) return new_height, new_width
def zoomout(image, gt_bboxes, params): X_out = tf.random_uniform([], 1.05, params['X_out']) h, w, _ = tf.unstack(tf.to_float(tf.shape(image))) zoomout_color = params['zoomout_color']+[0] bg_color = tf.constant(zoomout_color, dtype=tf.float32) x_shift = tf.random_uniform([], 0, (X_out - 1) * w) y_shift = tf.random_uniform([], 0, (X_out - 1) * h) x2_shift = (X_out - 1) * w - x_shift y2_shift = (X_out - 1) * h - y_shift # somewhat hacky solution to pad with MEAN_COLOR # tf.pad does not support custom constant padding unlike numpy image -= bg_color image = tf.pad(image, tf.to_int32([[y_shift, y2_shift], [x_shift, x2_shift], [0, 0]])) image += bg_color gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1) gt_bboxes = tf.stack([gt_x + x_shift/w, gt_y + y_shift/h, gt_w, gt_h], axis=1)/X_out return image, gt_bboxes
def depthCELoss2(pred, gt, weight, ss, outputChannels=16): with tf.name_scope("depth_CE_loss"): pred = tf.reshape(pred, (-1, outputChannels)) epsilon = tf.constant(value=1e-25) predSoftmax = tf.to_float(tf.nn.softmax(pred)) gt = tf.one_hot(indices=tf.to_int32(tf.squeeze(tf.reshape(gt, (-1, 1)))), depth=outputChannels, dtype=tf.float32) ss = tf.to_float(tf.reshape(ss, (-1, 1))) weight = tf.to_float(tf.reshape(weight, (-1, 1))) crossEntropyScaling = tf.to_float([3.0, 3.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) crossEntropy = -tf.reduce_sum(((1-gt)*tf.log(tf.maximum(1-predSoftmax, epsilon)) + gt*tf.log(tf.maximum(predSoftmax, epsilon)))*ss*crossEntropyScaling*weight, reduction_indices=[1]) crossEntropySum = tf.reduce_sum(crossEntropy, name="cross_entropy_sum") return crossEntropySum
def _deepfool2(model, x, epochs, eta, clip_min, clip_max, min_prob): y0 = tf.stop_gradient(tf.reshape(model(x), [-1])[0]) y0 = tf.to_int32(tf.greater(y0, 0.5)) def _cond(i, z): xadv = tf.clip_by_value(x + z*(1+eta), clip_min, clip_max) y = tf.stop_gradient(tf.reshape(model(xadv), [-1])[0]) y = tf.to_int32(tf.greater(y, 0.5)) return tf.logical_and(tf.less(i, epochs), tf.equal(y0, y)) def _body(i, z): xadv = tf.clip_by_value(x + z*(1+eta), clip_min, clip_max) y = tf.reshape(model(xadv), [-1])[0] g = tf.gradients(y, xadv)[0] dx = - y * g / tf.norm(g) return i+1, z+dx _, noise = tf.while_loop(_cond, _body, [0, tf.zeros_like(x)], name='_deepfool2_impl', back_prop=False) return noise
def huber_loss(infer, label, epsilon, layer_name): """ Args: infer label epsilon layer_name """ with tf.variable_scope(layer_name): abs_diff = tf.abs(tf.sub(infer, label)); index = tf.to_int32(abs_diff <= epsilon, name = 'partition_index') l1_part, l2_part = tf.dynamic_partition(abs_diff, index, 2) #l1_loss = tf.reduce_mean(l1_part, name = 'l1_loss') #l2_loss = tf.reduce_mean(tf.square(l2_part), name = 'l2_loss') l1_part_loss = epsilon * (l1_part - 0.5 * epsilon) l2_part_loss = 0.5 * tf.square(l2_part) hloss = tf.reduce_mean(tf.concat(0, [l1_part_loss,l2_part_loss]), name = 'huber_loss_sum') return hloss
def ternary_decoder(encoded_data, scaler, shape): """Decoding the signs to float format """ a = tf.cast(encoded_data, tf.int32) a_split1 = tf.mod(a,4) a_split2 = tf.to_int32(tf.mod(a/4,4)) a_split3 = tf.to_int32(tf.mod(a/16,4)) a_split4 = tf.to_int32(tf.mod(a/64,4)) a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0) real_size = tf.reduce_prod(shape) a = tf.to_float(a) a = tf.gather(a, tf.range(0,real_size)) a = tf.reshape(a, shape) a = tf.subtract(a,1) decoded = a*scaler return decoded
def _crop(image, offset_height, offset_width, crop_height, crop_width): original_shape = tf.shape(image) rank_assertion = tf.Assert( tf.equal(tf.rank(image), 3), ['Rank of image must be equal to 3.']) cropped_shape = control_flow_ops.with_dependencies( [rank_assertion], tf.stack([crop_height, crop_width, original_shape[2]])) size_assertion = tf.Assert( tf.logical_and( tf.greater_equal(original_shape[0], crop_height), tf.greater_equal(original_shape[1], crop_width)), ['Crop size greater than the image size.']) offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) # Use tf.slice instead of crop_to_bounding box as it accepts tensors to # define the crop size. image = control_flow_ops.with_dependencies([size_assertion], tf.slice(image, offsets, cropped_shape)) return tf.reshape(image, cropped_shape)
def tf_random_aspect_resize(image, label, low_val=1.0, upper_val=1.5): shape = tf.shape(image) height = shape[0] width = shape[1] # 1~1.5 which_side = tf.to_float(tf.random_uniform([1]))[0] multi_val = tf.to_float(tf.random_uniform([1]))[0] * (upper_val - low_val) + low_val new_height = tf.cond(which_side > 0.5, lambda: tf.to_float(height), lambda: tf.to_float(height) * multi_val) new_width = tf.cond(which_side <= 0.5, lambda: tf.to_float(width), lambda: tf.to_float(width) * multi_val) new_height = tf.to_int32(new_height) new_width = tf.to_int32(new_width) image = tf.expand_dims(image, 0) label = tf.expand_dims(label, 0) resized_image = tf.image.resize_bilinear(image, [new_height, new_width], align_corners=False) resized_image = tf.cast(resized_image, tf.uint8) resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width], align_corners=False) resized_label = tf.cast(resized_label, tf.uint8) resized_image = tf.squeeze(resized_image, 0) resized_label = tf.squeeze(resized_label, 0) return resized_image, resized_label
def get_learning_rate_decay(learning_rate, global_step, params): if params.learning_rate_decay == "noam": step = tf.to_float(global_step) warmup_steps = tf.to_float(params.warmup_steps) multiplier = params.hidden_size ** -0.5 decay = multiplier * tf.minimum((step + 1) * (warmup_steps ** -1.5), (step + 1) ** -0.5) return learning_rate * decay elif params.learning_rate_decay == "piecewise_constant": return tf.train.piecewise_constant(tf.to_int32(global_step), params.learning_rate_boundaries, params.learning_rate_values) elif params.learning_rate_decay == "none": return learning_rate else: raise ValueError("Unknown learning_rate_decay")
def ctc_batch_cost(y_true, y_pred, input_length, label_length): """Runs CTC loss algorithm on each batch element. # Arguments y_true: tensor `(samples, max_string_length)` containing the truth labels. y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_pred`. label_length: tensor `(samples, 1)` containing the sequence length for each batch item in `y_true`. # Returns Tensor with shape (samples,1) containing the CTC loss of each element """ label_length = tf.to_int32(tf.squeeze(label_length)) input_length = tf.to_int32(tf.squeeze(input_length)) sparse_labels = tf.to_int32(ctc_label_dense_to_sparse(y_true, label_length)) y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8) return tf.expand_dims(ctc.ctc_loss(inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1)
def image_scaling(img, label): """ Randomly scales the images between 0.5 to 1.5 times the original size. Args: img: Training image to scale. label: Segmentation mask to scale. """ scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None) h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale)) w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale)) new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1]) img = tf.image.resize_images(img, new_shape) label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape) label = tf.squeeze(label, squeeze_dims=[0]) return img, label
def _crop(image, offset_height, offset_width, crop_height, crop_width): original_shape = tf.shape(image) rank_assertion = tf.Assert( tf.equal(tf.rank(image), 3), ['Rank of image must be equal to 3.']) cropped_shape = control_flow_ops.with_dependencies( [rank_assertion], tf.stack([crop_height, crop_width, original_shape[2]])) size_assertion = tf.Assert( tf.logical_and( tf.greater_equal(original_shape[0], crop_height), tf.greater_equal(original_shape[1], crop_width)), ['Crop size greater than the image size.']) offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) # Use tf.slice instead of crop_to_bounding box as it accepts tensors to # define the crop size. image = control_flow_ops.with_dependencies( [size_assertion], tf.slice(image, offsets, cropped_shape)) return tf.reshape(image, cropped_shape)
def select_present(x, presence, batch_size=1, name='select_present'): with tf.variable_scope(name): presence = 1 - tf.to_int32(presence) # invert mask bs = x.get_shape()[0] if bs != None: # here type(bs) is tf.Dimension and == is ok batch_size = int(bs) num_partitions = 2 * batch_size r = tf.range(0, num_partitions, 2) r.set_shape(tf.TensorShape(batch_size)) r = broadcast_against(r, presence) presence += r selected = tf.dynamic_partition(x, presence, num_partitions) selected = tf.concat(axis=0, values=selected) selected = tf.reshape(selected, tf.shape(x)) return selected
def _bbox_to_mask(yy, region_size, dtype): # trim bounding box exeeding region_size on top and left neg_part = tf.nn.relu(-yy[:2]) core = tf.ones(tf.to_int32(tf.round(yy[2:] - neg_part)), dtype=dtype) y1 = tf.maximum(yy[0], 0.) x1 = tf.maximum(yy[1], 0.) y2 = tf.minimum(region_size[0], yy[0] + yy[2]) x2 = tf.minimum(region_size[1], yy[1] + yy[3]) padding = (y1, region_size[0] - y2, x1, region_size[1] - x2) padding = tf.reshape(tf.stack(padding), (-1, 2)) padding = tf.to_int32(tf.round(padding)) mask = tf.pad(core, padding) # trim bounding box exeeding region_size on bottom and right rs = tf.to_int32(tf.round(region_size)) mask = mask[:rs[0], :rs[1]] mask.set_shape((None, None)) return mask
def get_hash_slots(self, query): """Gets hashed-to buckets for batch of queries. Args: query: 2-d Tensor of query vectors. Returns: A list of hashed-to buckets for each hash function. """ binary_hash = [ tf.less(tf.matmul(query, self.hash_vecs[i], transpose_b=True), 0) for i in xrange(self.num_libraries)] hash_slot_idxs = [ tf.reduce_sum( tf.to_int32(binary_hash[i]) * tf.constant([[2 ** i for i in xrange(self.num_hashes)]], dtype=tf.int32), 1) for i in xrange(self.num_libraries)] return hash_slot_idxs
def cross_entropy_sequence_loss(logits, targets, sequence_length): """Calculates the per-example cross-entropy loss for a sequence of logits and masks out all losses passed the sequence length. Args: logits: Logits of shape `[T, B, vocab_size]` targets: Target classes of shape `[T, B]` sequence_length: An int32 tensor of shape `[B]` corresponding to the length of each input Returns: A tensor of shape [T, B] that contains the loss per example, per time step. """ with tf.name_scope("cross_entropy_sequence_loss"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) loss_mask = tf.sequence_mask(tf.to_int32( sequence_length), tf.to_int32(tf.shape(targets)[0])) losses = losses * tf.transpose(tf.to_float(loss_mask), [1, 0]) return losses
def random_image_scaling(image, label): """Randomly scales the images between 0.5 to 1.5 times the original size. Args: img: Training image to scale. label: Segmentation mask to scale. """ scale = tf.random_uniform( [1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None) h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(image)[0]), scale)) w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(image)[1]), scale)) new_shape = tf.squeeze(tf.stack([h_new, w_new]), axis=1) image = tf.image.resize_images(image, new_shape) label = tf.image.resize_nearest_neighbor( tf.expand_dims(label, 0), new_shape) label = tf.squeeze(label, axis=0) return image, label
def random_image_scaling(self, image, label): """Randomly scales the images between 0.5 to 1.5 times the original size. Args: img: Training image to scale. label: Segmentation mask to scale. """ scale = tf.random_uniform( [1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None) h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(image)[0]), scale)) w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(image)[1]), scale)) new_shape = tf.squeeze(tf.stack([h_new, w_new]), axis=1) image = tf.image.resize_images(image, new_shape) label = tf.image.resize_nearest_neighbor( tf.expand_dims(label, 0), new_shape) label = tf.squeeze(label, axis=0) return image, label
def build_loss(self): upsampled_batch = self.get_output('output_logits') annotation_batch=self.get_output('label') class_labels = [i for i in range(cfg.NCLASSES)] class_labels.append(255) print("class_label: ", class_labels) annotation_batch = tf.squeeze(annotation_batch, axis=3) annotation_batch=tf.to_int32(annotation_batch) valid_annotation_batch, valid_logits_batch = get_valid_logits_and_labels(logits_batch_tensor=upsampled_batch, \ annotation_batch_tensor=annotation_batch, \ class_labels=class_labels) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=valid_logits_batch, labels=valid_annotation_batch)) # add regularizer if cfg.TRAIN.WEIGHT_DECAY > 0: regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = tf.add_n(regularization_losses) + loss return loss
def flip_randomly(inputs, horizontally, vertically, is_training, name=None): """Flip images randomly. Make separate flipping decision for each image. Args: inputs (4-D tensor): Input images (batch size, height, width, channels). horizontally (bool): If True, flip horizontally with 50% probability. Otherwise, don't. vertically (bool): If True, flip vertically with 50% probability. Otherwise, don't. is_training (bool): If False, no flip is performed. scope: A name for the operation. """ with tf.name_scope(name, "flip_randomly") as scope: batch_size, height, width, _ = tf.unstack(tf.shape(inputs)) vertical_choices = (tf.random_uniform([batch_size], 0, 2, tf.int32) * tf.to_int32(vertically) * tf.to_int32(is_training)) horizontal_choices = (tf.random_uniform([batch_size], 0, 2, tf.int32) * tf.to_int32(horizontally) * tf.to_int32(is_training)) vertically_flipped = tf.reverse_sequence(inputs, vertical_choices * height, 1) both_flipped = tf.reverse_sequence(vertically_flipped, horizontal_choices * width, 2) return tf.identity(both_flipped, name=scope)
def _build_output_graph(self, rep, t, dim_in, dim_out, do_out, FLAGS): ''' Construct output/regression layers ''' if FLAGS.split_output: i0 = tf.to_int32(tf.where(t < 1)[:,0]) i1 = tf.to_int32(tf.where(t > 0)[:,0]) rep0 = tf.gather(rep, i0) rep1 = tf.gather(rep, i1) y0, weights_out0, weights_pred0 = self._build_output(rep0, dim_in, dim_out, do_out, FLAGS) y1, weights_out1, weights_pred1 = self._build_output(rep1, dim_in, dim_out, do_out, FLAGS) y = tf.dynamic_stitch([i0, i1], [y0, y1]) weights_out = weights_out0 + weights_out1 weights_pred = weights_pred0 + weights_pred1 else: h_input = tf.concat(1,[rep, t]) y, weights_out, weights_pred = self._build_output(h_input, dim_in+1, dim_out, do_out, FLAGS) return y, weights_out, weights_pred
def get_fix_offset(h, w, crop_height, crop_width): crop_offsets = [] height_off = (h - crop_height) / 4 width_off = (w - crop_width) / 4 crop_offsets.append(tf.stack([0, 0])) crop_offsets.append(tf.stack([0, tf.to_int32(4 * width_off)])) crop_offsets.append(tf.stack([tf.to_int32(4 * height_off), 0])) crop_offsets.append(tf.stack([tf.to_int32(4 * height_off), tf.to_int32(4 * width_off)])) crop_offsets.append(tf.stack([tf.to_int32(2 * height_off), tf.to_int32(2 * width_off)])) # more fix crop crop_offsets.append(tf.stack([0, tf.to_int32(2 * width_off)])) crop_offsets.append(tf.stack([tf.to_int32(4 * height_off), tf.to_int32(2 * width_off)])) crop_offsets.append(tf.stack([tf.to_int32(2 * height_off), 0])) crop_offsets.append(tf.stack([tf.to_int32(2 * height_off), tf.to_int32(4 * width_off)])) crop_offsets.append(tf.stack([tf.to_int32(height_off), tf.to_int32(width_off)])) crop_offsets.append(tf.stack([tf.to_int32(height_off), tf.to_int32(3 * width_off)])) crop_offsets.append(tf.stack([tf.to_int32(3 * height_off), tf.to_int32(width_off)])) crop_offsets.append(tf.stack([tf.to_int32(3 * height_off), tf.to_int32(3 * width_off)])) crop_offsets = tf.stack(crop_offsets) return crop_offsets
def preprocess(image, size, max_length): shape = tf.shape(image) size_t = tf.constant(size, tf.float64) height = tf.cast(shape[0], tf.float64) width = tf.cast(shape[1], tf.float64) cond_op = tf.less(width, height) if max_length else tf.less(height, width) new_height, new_width = tf.cond( cond_op, lambda: (size_t, (width * size_t) / height), lambda: ((height * size_t) / width, size_t)) new_size = [tf.to_int32(new_height), tf.to_int32(new_width)] resized_image = tf.image.resize_images(image, new_size) normalised_image = resized_image - mean_pixel return normalised_image # max_length: Wether size dictates longest or shortest side. Default longest
def cross_entropy_sequence_loss(logits, targets, sequence_length): with tf.name_scope('cross_entropy_sequence_loss'): total_length = tf.to_float(tf.reduce_sum(sequence_length)) entropy_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) # Mask out the losses we don't care about loss_mask = tf.sequence_mask( tf.to_int32(sequence_length), tf.to_int32(tf.shape(targets)[0])) loss_mask = tf.transpose(tf.to_float(loss_mask), [1, 0]) losses = entropy_losses * loss_mask # losses.shape: T * B # sequence_length: B total_loss_avg = tf.reduce_sum(losses) / total_length return total_loss_avg
def next_inputs(self, time, sample_ids=None, prev_finished=None): if sample_ids is None or self.teacher_rate > 0.: finished = tf.greater_equal(time + 1, self.sequence_length) else: finished = math_ops.logical_or( tf.greater_equal(time + 1, self.max_step), tf.equal(self.eos_id, sample_ids)) if self.teacher_rate == 1. or (sample_ids is None): next_input_ids = self._input_tas.read(time) return finished, self.lookup(next_input_ids) if self.teacher_rate > 0.: # scheduled teacher_rates = tf.less_equal( tf.random_uniform(tf.shape(sample_ids), minval=0., maxval=1.), self.teacher_rate) teacher_rates = tf.to_int32(teacher_rates) next_input_ids = (teacher_rates * self._input_tas.read(time) + (1 - teacher_rates) * sample_ids) else: next_input_ids = sample_ids return finished, self.lookup(next_input_ids)
def sample(self, logits, time): rl_time_steps = tf.floordiv(tf.maximum(self.global_step_tensor - self.burn_in_step, 0), self.increment_step) start_rl_step = self.sequence_length - rl_time_steps next_input_ids = tf.cond( tf.greater_equal(time, self.max_sequence_length), lambda: tf.tile([self.eos_id], [self.batch_size]), lambda: self._input_tas.read(time)) next_predicted_ids = tf.squeeze(tf.multinomial(logits, 1), axis=[-1]) mask = tf.to_int32(time >= start_rl_step) return (1 - mask) * tf.to_int32(next_input_ids) + mask * tf.to_int32( next_predicted_ids)
def __init__(self, query_size, keys, values, values_length, name='attention'): self.attention_size = keys.get_shape().as_list()[-1] self.keys = keys self.values = values self.values_length = values_length self.query_trans = LinearOp(query_size, self.attention_size, name=name) with tf.variable_scope(name): self.v_att = tf.get_variable('v_att', shape=[self.attention_size], dtype=DTYPE) self.time_axis = 0 if TIME_MAJOR else 1 # Replace all scores for padded inputs with tf.float32.min num_scores = tf.shape(self.keys)[self.time_axis] scores_mask = tf.sequence_mask( lengths=tf.to_int32(self.values_length), maxlen=tf.to_int32(num_scores), dtype=DTYPE) if TIME_MAJOR: scores_mask = tf.transpose(scores_mask) self.scores_mask = scores_mask
def test_keep_prob(self): """Counts dropped items and compare with the expectation""" var = tf.ones([10000]) s = tf.Session() for kprob in [0.1, 0.7]: dropped_var = dropout(var, kprob, tf.constant(True)) dropped_size = tf.reduce_sum( tf.to_int32(tf.equal(dropped_var, 0.0))) dsize = s.run(dropped_size) expected_dropped_size = 10000 * (1 - kprob) self.assertTrue(np.isclose(expected_dropped_size, dsize, atol=500))