我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.maximum()。
def calculate_loss(self, predictions, labels, **unused_params): bound = FLAGS.softmax_bound vocab_size_1 = bound with tf.name_scope("loss_softmax"): epsilon = 10e-8 float_labels = tf.cast(labels, tf.float32) labels_1 = float_labels[:,:vocab_size_1] predictions_1 = predictions[:,:vocab_size_1] cross_entropy_loss = CrossEntropyLoss().calculate_loss(predictions_1,labels_1) lables_2 = float_labels[:,vocab_size_1:] predictions_2 = predictions[:,vocab_size_1:] # l1 normalization (labels are no less than 0) label_rowsum = tf.maximum( tf.reduce_sum(lables_2, 1, keep_dims=True), epsilon) label_append = 1.0-tf.reduce_max(lables_2, 1, keep_dims=True) norm_float_labels = tf.concat((tf.div(lables_2, label_rowsum),label_append),axis=1) predictions_append = 1.0-tf.reduce_sum(predictions_2, 1, keep_dims=True) softmax_outputs = tf.concat((predictions_2,predictions_append),axis=1) softmax_loss = norm_float_labels * tf.log(softmax_outputs + epsilon) + ( 1 - norm_float_labels) * tf.log(1 - softmax_outputs + epsilon) softmax_loss = tf.negative(tf.reduce_sum(softmax_loss, 1)) return tf.reduce_mean(softmax_loss) + cross_entropy_loss
def __init__(self, num_classes=4716, feature_sizes=[1024], feature_names=["inc3"], max_frames=300): """Construct a YT8MFrameFeatureReader. Args: num_classes: a positive integer for the number of classes. feature_sizes: positive integer(s) for the feature dimensions as a list. feature_names: the feature name(s) in the tensorflow record as a list. max_frames: the maximum number of frames to process. """ assert len(feature_names) == len(feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(feature_names), len(feature_sizes)) self.num_classes = num_classes self.feature_sizes = feature_sizes self.feature_names = feature_names self.max_frames = max_frames
def resample(patient, new_spacing=[1,1,1]): scan = get_scan(patient) image = get_3D_data(patient) # Determine current pixel spacing spacing = np.array([scan[0].SliceThickness] + scan[0].PixelSpacing, dtype=np.float32) resize_factor = spacing / new_spacing new_real_shape = image.shape * resize_factor new_shape = np.round(new_real_shape) real_resize_factor = new_shape / image.shape new_spacing = spacing / real_resize_factor image = nd.interpolation.zoom(image, real_resize_factor, mode='nearest') return image # For the sake of testing the network, we'll be using the sample dataset # For this, we'll use the maximum size of the image # and PAD any image with -1000 values which is smaller than that #PS: only the first dimension is different in sample dataset #which is not the case in actual dataset
def cyclic_learning_rate( learning_rate_min, learning_rate_max, step_size, global_step, mode='triangular', scope=None): with tf.variable_scope(scope, 'CyclicLearningRate'): cycle = tf.floor(1 + tf.to_float(global_step) / (2 * step_size)) if mode == 'triangular': scale = 1 elif mode == 'triangular2': scale = 2**(cycle - 1) else: raise ValueError('Unrecognized mode: {}'.format(mode)) x = tf.abs(tf.to_float(global_step) / step_size - 2 * cycle + 1) lr = learning_rate_min + (learning_rate_max - learning_rate_min) * \ tf.maximum(0.0, 1 - x) / scale return lr
def _anneal_weight(init_val, final_val, anneal_type, global_step, anneal_steps, hold_for=0., steps_div=1., dtype=tf.float64): val, final, step, hold_for, anneal_steps, steps_div = (tf.cast(i, dtype) for i in (init_val, final_val, global_step, hold_for, anneal_steps, steps_div)) step = tf.maximum(step - hold_for, 0.) if anneal_type == 'exp': decay_rate = tf.pow(final / val, steps_div / anneal_steps) val = tf.train.exponential_decay(val, step, steps_div, decay_rate) elif anneal_type == 'linear': val = final + (val - final) * (1. - step / anneal_steps) else: raise NotImplementedError anneal_weight = tf.maximum(final, val) return anneal_weight
def shrink_soft_threshold(r,rvar,theta): """ soft threshold function y=sign(x)*max(0,abs(x)-theta[0]*sqrt(rvar) )*scaling where scaling is theta[1] (default=1) in other words, if theta is len(1), then the standard """ if len(theta.get_shape())>0 and theta.get_shape() != (1,): lam = theta[0] * tf.sqrt(rvar) scale=theta[1] else: lam = theta * tf.sqrt(rvar) scale = None lam = tf.maximum(lam,0) arml = tf.abs(r) - lam xhat = tf.sign(r) * tf.maximum(arml,0) dxdr = tf.reduce_mean(tf.to_float(arml>0),0) if scale is not None: xhat = xhat*scale dxdr = dxdr*scale return (xhat,dxdr)
def pwlin_grid(r_,rvar_,theta_,dtheta = .75): """piecewise linear with noise-adaptive grid spacing. returns xhat,dxdr where q = r/dtheta/sqrt(rvar) xhat = r * interp(q,theta) all but the last dimensions of theta must broadcast to r_ e.g. r.shape = (500,1000) is compatible with theta.shape=(500,1,7) """ ntheta = int(theta_.get_shape()[-1]) scale_ = dtheta / tf.sqrt(rvar_) ars_ = tf.clip_by_value( tf.expand_dims( tf.abs(r_)*scale_,-1),0.0, ntheta-1.0 ) centers_ = tf.constant( np.arange(ntheta),dtype=tf.float32 ) outer_distance_ = tf.maximum(0., 1.0-tf.abs(ars_ - centers_) ) # new dimension for distance to closest bin centers (or center) gain_ = tf.reduce_sum( theta_ * outer_distance_,axis=-1) # apply the gain (learnable) xhat_ = gain_ * r_ dxdr_ = tf.gradients(xhat_,r_)[0] return (xhat_,dxdr_)
def apply_perturbations(i, j, X, increase, theta, clip_min, clip_max): """ TensorFlow implementation for apply perturbations to input features based on salency maps :param i: index of first selected feature :param j: index of second selected feature :param X: a matrix containing our input features for our sample :param increase: boolean; true if we are increasing pixels, false otherwise :param theta: delta for each feature adjustment :param clip_min: mininum value for a feature in our sample :param clip_max: maximum value for a feature in our sample : return: a perturbed input feature matrix for a target class """ # perturb our input sample if increase: X[0, i] = np.minimum(clip_max, X[0, i] + theta) X[0, j] = np.minimum(clip_max, X[0, j] + theta) else: X[0, i] = np.maximum(clip_min, X[0, i] - theta) X[0, j] = np.maximum(clip_min, X[0, j] - theta) return X
def _summarize_progress(train_data, feature, label, gene_output, batch, suffix, max_samples=8): td = train_data size = [label.shape[1], label.shape[2]] nearest = tf.image.resize_nearest_neighbor(feature, size) nearest = tf.maximum(tf.minimum(nearest, 1.0), 0.0) bicubic = tf.image.resize_bicubic(feature, size) bicubic = tf.maximum(tf.minimum(bicubic, 1.0), 0.0) clipped = tf.maximum(tf.minimum(gene_output, 1.0), 0.0) # image = tf.concat([nearest, bicubic, clipped, label], 2) image = clipped printCnt = 5 image = image[0:printCnt] image = tf.concat([image[i,:,:,:] for i in range(printCnt)], 0) image = td.sess.run(image) filename = 'batch%06d_%s.png' % (batch, suffix) filename = os.path.join(FLAGS.train_dir, filename) scipy.misc.toimage(image, cmin=0., cmax=1.).save(filename) print(" Saved %s" % (filename,))
def _apply_dense(self, grad, var): lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: eps = 1e-7 # Can't use 1e-8 due to underflow -- not sure if it makes a big difference. else: eps = 1e-8 v = self.get_slot(var, "v") v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad) m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = v_t / m_t var_update = tf.assign_sub(var, lr_t * g_t) return tf.group(*[var_update, m_t, v_t])
def batch_iou_tf(proposals, gt): bboxes = tf.reshape(tf.transpose(proposals), [4, -1, 1]) bboxes_x1 = bboxes[0] bboxes_x2 = bboxes[0]+bboxes[2] bboxes_y1 = bboxes[1] bboxes_y2 = bboxes[1]+bboxes[3] gt = tf.reshape(tf.transpose(gt), [4, 1, -1]) gt_x1 = gt[0] gt_x2 = gt[0]+gt[2] gt_y1 = gt[1] gt_y2 = gt[1]+gt[3] widths = tf.maximum(0.0, tf.minimum(bboxes_x2, gt_x2) - tf.maximum(bboxes_x1, gt_x1)) heights = tf.maximum(0.0, tf.minimum(bboxes_y2, gt_y2) - tf.maximum(bboxes_y1, gt_y1)) intersection = widths*heights union = bboxes[2]*bboxes[3] + gt[2]*gt[3] - intersection return (intersection / union)
def batch_iou(proposals, gt): bboxes = np.transpose(proposals).reshape((4, -1, 1)) bboxes_x1 = bboxes[0] bboxes_x2 = bboxes[0]+bboxes[2] bboxes_y1 = bboxes[1] bboxes_y2 = bboxes[1]+bboxes[3] gt = np.transpose(gt).reshape((4, 1, -1)) gt_x1 = gt[0] gt_x2 = gt[0]+gt[2] gt_y1 = gt[1] gt_y2 = gt[1]+gt[3] widths = np.maximum(0, np.minimum(bboxes_x2, gt_x2) - np.maximum(bboxes_x1, gt_x1)) heights = np.maximum(0, np.minimum(bboxes_y2, gt_y2) - np.maximum(bboxes_y1, gt_y1)) intersection = widths*heights union = bboxes[2]*bboxes[3] + gt[2]*gt[3] - intersection return (intersection / union)
def decode_bboxes(tcoords, anchors): var_x, var_y, var_w, var_h = config['prior_variance'] t_x = tcoords[:, 0]*var_x t_y = tcoords[:, 1]*var_y t_w = tcoords[:, 2]*var_w t_h = tcoords[:, 3]*var_h a_w = anchors[:, 2] a_h = anchors[:, 3] a_x = anchors[:, 0]+a_w/2 a_y = anchors[:, 1]+a_h/2 x = t_x*a_w + a_x y = t_y*a_h + a_y w = tf.exp(t_w)*a_w h = tf.exp(t_h)*a_h x1 = tf.maximum(0., x - w/2) y1 = tf.maximum(0., y - h/2) x2 = tf.minimum(1., w + x1) y2 = tf.minimum(1., h + y1) return tf.stack([y1, x1, y2, x2], axis=1)
def __init__(self, epsilon=1e-2, shape=()): self._sum = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(0.0), name="runningsum", trainable=False) self._sumsq = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(epsilon), name="runningsumsq", trainable=False) self._count = tf.get_variable( dtype=tf.float64, shape=(), initializer=tf.constant_initializer(epsilon), name="count", trainable=False) self.shape = shape self.mean = tf.to_float(self._sum / self._count) self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 )) newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum') newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var') newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count') self.incfiltparams = U.function([newsum, newsumsq, newcount], [], updates=[tf.assign_add(self._sum, newsum), tf.assign_add(self._sumsq, newsumsq), tf.assign_add(self._count, newcount)])
def clip(x, min_value, max_value): """Element-wise value clipping. If min_value > max_value, clipping range is [min_value,min_value]. # Arguments x: Tensor or variable. min_value: Tensor, float, int, or None. If min_value is None, defaults to -infinity. max_value: Tensor, float, int, or None. If max_value is None, defaults to infinity. # Returns A tensor. """ if max_value is None: max_value = np.inf if min_value is None: min_value = -np.inf min_value = _to_tensor(min_value, x.dtype.base_dtype) max_value = _to_tensor(max_value, x.dtype.base_dtype) max_value = tf.maximum(min_value, max_value) return tf.clip_by_value(x, min_value, max_value)
def _apply_dense(self, grad, var): lr_t = tf.cast(self._lr_t, var.dtype.base_dtype) beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype) beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype) if var.dtype.base_dtype == tf.float16: # Can't use 1e-8 due to underflow eps = 1e-7 else: eps = 1e-8 v = self.get_slot(var, "v") v_t = v.assign(beta1_t * v + (1. - beta1_t) * grad) m = self.get_slot(var, "m") m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad))) g_t = v_t / m_t var_update = tf.assign_sub(var, lr_t * g_t) return tf.group(*[var_update, m_t, v_t])
def scanline_error(tensor, shape): """ """ height, width, channels = shape value_shape = [height, width, 1] error_line = tf.maximum(basic([int(height * .75), 1], value_shape, distrib=ValueDistribution.exp) - .5, 0) error_swerve = tf.maximum(basic([int(height * .01), 1], value_shape, distrib=ValueDistribution.exp) - .5, 0) error_line *= error_swerve error_swerve *= 2 white_noise = basic([int(height * .75), 1], value_shape) white_noise = effects.blend(0, white_noise, error_swerve) error = error_line + white_noise y_index = effects.column_index(shape) x_index = (effects.row_index(shape) - tf.cast(effects.value_map(error, value_shape) * width * .025, tf.int32)) % width return tf.minimum(tf.gather_nd(tensor, tf.stack([y_index, x_index], 2)) + error_line * white_noise * 4, 1)
def _conform_kernel_to_tensor(kernel, tensor, shape): """ Re-shape a convolution kernel to match the given tensor's color dimensions. """ l = len(kernel) channels = shape[-1] temp = np.repeat(kernel, channels) temp = tf.reshape(temp, (l, l, channels, 1)) temp = tf.cast(temp, tf.float32) temp /= tf.maximum(tf.reduce_max(temp), tf.reduce_min(temp) * -1) return temp
def conv_feedback(tensor, shape, iterations=50, alpha=.5): """ Conv2d feedback loop :param Tensor tensor: :return: Tensor """ iterations = 100 half_shape = [int(shape[0] * .5), int(shape[1] * .5), shape[2]] convolved = offset(_downsample(tensor, shape, half_shape), half_shape, x=iterations * -3, y=iterations * -3) for i in range(iterations): convolved = convolve(ConvKernel.blur, convolved, half_shape) convolved = convolve(ConvKernel.sharpen, convolved, half_shape) convolved = normalize(convolved) up = tf.maximum((convolved - .5) * 2, 0.0) down = tf.minimum(convolved * 2, 1.0) return blend(tensor, resample(up + (1.0 - down), shape), alpha)
def blend_layers(control, shape, feather=1.0, *layers): layer_count = len(layers) control = normalize(control) control *= layer_count control_floor = tf.cast(control, tf.int32) x_index = row_index(shape) y_index = column_index(shape) layers = tf.stack(list(layers) + [layers[-1]]) layer_count += 1 floor_values = control_floor[:, :, 0] # I'm not sure why the mod operation is needed, but tensorflow-cpu explodes without it. combined_layer_0 = tf.gather_nd(layers, tf.stack([floor_values % layer_count, y_index, x_index], 2)) combined_layer_1 = tf.gather_nd(layers, tf.stack([(floor_values + 1) % layer_count, y_index, x_index], 2)) control_floor_fract = control - tf.floor(control) control_floor_fract = tf.minimum(tf.maximum(control_floor_fract - (1.0 - feather), 0.0) / feather, 1.0) return blend(combined_layer_0, combined_layer_1, control_floor_fract)
def bloom(tensor, shape, alpha=.5): """ Bloom effect :param Tensor tensor: :param list[int] shape: :param float alpha: """ height, width, channels = shape blurred = tf.maximum(tensor * 2.0 - 1.0, 0.0) blurred = _downsample(blurred, shape, [max(int(height * .01), 1), max(int(width * .01), 1), channels]) * 4.0 blurred = resample(blurred, shape) blurred = offset(blurred, shape, x=int(shape[1] * -.05), y=int(shape[0] * -.05)) return blend(tensor, normalize(tensor + blurred), alpha)
def yuv2rgb(yuv): """ Convert YUV image into RGB https://en.wikipedia.org/wiki/YUV """ yuv = tf.multiply(yuv, 255) yuv2rgb_filter = tf.constant([[[[1., 1., 1.], [0., -0.34413999, 1.77199996], [1.40199995, -0.71414, 0.]]]]) yuv2rgb_bias = tf.constant([-179.45599365, 135.45983887, -226.81599426]) yuv = tf.expand_dims(yuv, 0) temp = tf.nn.conv2d(yuv, yuv2rgb_filter, [1, 1, 1, 1], 'SAME') temp = tf.nn.bias_add(temp, yuv2rgb_bias) temp = tf.maximum(temp, tf.zeros(temp.get_shape(), dtype=tf.float32)) temp = tf.minimum(temp, tf.multiply( tf.ones(temp.get_shape(), dtype=tf.float32), 255)) temp = tf.divide(temp, 255) temp = tf.squeeze(temp, [0]) return temp
def test_binary_ops_combined(self): # computation a = tf.placeholder(tf.float32, shape=(2, 3)) b = tf.placeholder(tf.float32, shape=(2, 3)) c = tf.add(a, b) d = tf.mul(c, a) e = tf.div(d, b) f = tf.sub(a, e) g = tf.maximum(a, f) # value a_val = np.random.rand(*tf_obj_shape(a)) b_val = np.random.rand(*tf_obj_shape(b)) # test self.run(g, tf_feed_dict={a: a_val, b: b_val})
def pre_process_data(image,training): if training: image = tf.random_crop(image,size=[img_size_cropped,img_size_cropped,cifar10.num_channels]) image = tf.image.flip_left_right(image) image = tf.image.random_hue(image) image = tf.image.random_contrast(image) image = tf.image.random_saturation(image) image = tf.image.random_brightness(image) image = tf.maximum(image,1.0) image = tf.minimum(image,0.0) else: #for testing image image = tf.image.resize_image_with_crop_or_pad(image,img_size_cropped,img_size_cropped); return image
def _impute2D(self, X_2D): r"""Mean impute a rank 2 tensor.""" # Fill zeros in for missing data initially data_zeroed_missing_tf = X_2D * self.real_val_mask # Sum the real values in each column col_tot = tf.reduce_sum(data_zeroed_missing_tf, 0) # Divide column totals by the number of non-nan values num_values_col = tf.reduce_sum(self.real_val_mask, 0) num_values_col = tf.maximum(num_values_col, tf.ones(tf.shape(num_values_col))) col_nan_means = tf.div(col_tot, num_values_col) # Make an vector of the impute values for each missing point imputed_vals = tf.gather(col_nan_means, self.missing_ind[:, 1]) # Fill the imputed values into the data tensor of zeros shape = tf.cast(tf.shape(data_zeroed_missing_tf), dtype=tf.int64) missing_imputed = tf.scatter_nd(self.missing_ind, imputed_vals, shape) X_with_impute = data_zeroed_missing_tf + missing_imputed return X_with_impute
def filter_prediction(self, boxes, probs, cls_idx): """Filter bounding box predictions with probability threshold and non-maximum supression. Args: boxes: array of [cx, cy, w, h]. probs: array of probabilities cls_idx: array of class indices Returns: final_boxes: array of filtered bounding boxes. final_probs: array of filtered probabilities final_cls_idx: array of filtered class indices """ mc = self.mc if mc.TOP_N_DETECTION < len(probs) and mc.TOP_N_DETECTION > 0: order = probs.argsort()[:-mc.TOP_N_DETECTION-1:-1] probs = probs[order] boxes = boxes[order] cls_idx = cls_idx[order] else: filtered_idx = np.nonzero(probs>mc.PROB_THRESH)[0] probs = probs[filtered_idx] boxes = boxes[filtered_idx] cls_idx = cls_idx[filtered_idx] final_boxes = [] final_probs = [] final_cls_idx = [] for c in range(mc.CLASSES): idx_per_class = [i for i in range(len(probs)) if cls_idx[i] == c] keep = util.nms(boxes[idx_per_class], probs[idx_per_class], mc.NMS_THRESH) for i in range(len(keep)): if keep[i]: final_boxes.append(boxes[idx_per_class[i]]) final_probs.append(probs[idx_per_class[i]]) final_cls_idx.append(c) return final_boxes, final_probs, final_cls_idx
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, distill_labels=None, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size = 2 cnn_input = model_input cnn_size = FLAGS.cnn_cells num_filters = [cnn_size, cnn_size, cnn_size*2] filter_sizes = [1, 2, 3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale,vocab_size,distill_labels=distill_labels, scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities,axis=1) moe_inputs = tf.stack(moe_inputs,axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, lstm_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1,vocab_size]) result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size = 2 cnn_input = model_input num_filters = [256, 256, 512] filter_sizes = [1, 2, 3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn_gate(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities, axis=1) moe_inputs = tf.stack(moe_inputs, axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size]) result["predictions"] = tf.reduce_mean(final_probilities, axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size = 2 cnn_input = model_input num_filters = [256, 256, 512] filter_sizes = [1, 2, 3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = self.rnn_glu(cnn_output, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = self.sub_moe(cnn_multiscale, vocab_size, scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities, axis=1) moe_inputs = tf.stack(moe_inputs, axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", moe_inputs, weight2d), dim=1) result = {} result["prediction_frames"] = tf.reshape(final_probilities,[-1, vocab_size]) result["predictions"] = tf.reduce_mean(final_probilities, axis=1) return result
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_extend = FLAGS.moe_num_extend num_layers = num_extend lstm_size = FLAGS.lstm_cells pool_size=2 cnn_input = model_input num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) final_probilities = [] moe_inputs = [] for layer in range(num_layers): cnn_output, num_t = LstmMultiscaleModel().cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output = tf.nn.relu(cnn_output) cnn_multiscale = LstmMultiscaleModel().rnn(cnn_output,lstm_size, num_frames,sub_scope="rnn%d"%(layer+1)) moe_inputs.append(cnn_multiscale) final_probility = LstmMultiscaleModel().sub_moe(cnn_multiscale,vocab_size,scopename="moe%d"%(layer+1)) final_probilities.append(final_probility) num_t = pool_size*(num_t//pool_size) cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size]) cnn_input = tf.reduce_max(cnn_output, axis=2) num_frames = tf.maximum(num_frames//pool_size,1) final_probilities = tf.stack(final_probilities, axis=1) moe_inputs = tf.stack(moe_inputs, axis=1) weight2d = tf.get_variable("ensemble_weight2d", shape=[num_extend, features_size, vocab_size], regularizer=slim.l2_regularizer(1.0e-8)) weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", tf.stop_gradient(moe_inputs), weight2d), dim=1) result = {} result["predictions"] = tf.reduce_sum(tf.stop_gradient(final_probilities)*weight, axis=1) return result
def resize_axis(tensor, axis, new_size, fill_value=0): """Truncates or pads a tensor to new_size on on a given axis. Truncate or extend tensor such that tensor.shape[axis] == new_size. If the size increases, the padding will be performed at the end, using fill_value. Args: tensor: The tensor to be resized. axis: An integer representing the dimension to be sliced. new_size: An integer or 0d tensor representing the new value for tensor.shape[axis]. fill_value: Value to use to fill any new entries in the tensor. Will be cast to the type of tensor. Returns: The resized tensor. """ tensor = tf.convert_to_tensor(tensor) shape = tf.unstack(tf.shape(tensor)) pad_shape = shape[:] pad_shape[axis] = tf.maximum(0, new_size - shape[axis]) shape[axis] = tf.minimum(shape[axis], new_size) shape = tf.stack(shape) resized = tf.concat([ tf.slice(tensor, tf.zeros_like(shape), shape), tf.fill(tf.stack(pad_shape), tf.cast(fill_value, tensor.dtype)) ], axis) # Update shape. new_shape = tensor.get_shape().as_list() # A copy is being made. new_shape[axis] = new_size resized.set_shape(new_shape) return resized
def get_video_matrix(self, features, feature_size, max_frames, max_quantized_value, min_quantized_value): """Decodes features from an input string and quantizes it. Args: features: raw feature values feature_size: length of each frame feature vector max_frames: number of frames (rows) in the output feature_matrix max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: feature_matrix: matrix of all frame-features num_frames: number of frames in the sequence """ decoded_features = tf.reshape( tf.cast(tf.decode_raw(features, tf.uint8), tf.float32), [-1, feature_size]) num_frames = tf.minimum(tf.shape(decoded_features)[0], max_frames) feature_matrix = utils.Dequantize(decoded_features, max_quantized_value, min_quantized_value) feature_matrix = resize_axis(feature_matrix, 0, max_frames) return feature_matrix, num_frames
def SampleRandomSequence(model_input, num_frames, num_samples): """Samples a random sequence of frames of size num_samples. Args: model_input: A tensor of size batch_size x max_frames x feature_size num_frames: A tensor of size batch_size x 1 num_samples: A scalar Returns: `model_input`: A tensor of size batch_size x num_samples x feature_size """ batch_size = tf.shape(model_input)[0] frame_index_offset = tf.tile( tf.expand_dims(tf.range(num_samples), 0), [batch_size, 1]) max_start_frame_index = tf.maximum(num_frames - num_samples, 0) start_frame_index = tf.cast( tf.multiply( tf.random_uniform([batch_size, 1]), tf.cast(max_start_frame_index + 1, tf.float32)), tf.int32) frame_index = tf.minimum(start_frame_index + frame_index_offset, tf.cast(num_frames - 1, tf.int32)) batch_index = tf.tile( tf.expand_dims(tf.range(batch_size), 1), [1, num_samples]) index = tf.stack([batch_index, frame_index], 2) return tf.gather_nd(model_input, index)
def calculate_loss(self, predictions, labels, b=1.0, **unused_params): with tf.name_scope("loss_hinge"): float_labels = tf.cast(labels, tf.float32) all_zeros = tf.zeros(tf.shape(float_labels), dtype=tf.float32) all_ones = tf.ones(tf.shape(float_labels), dtype=tf.float32) sign_labels = tf.subtract(tf.scalar_mul(2, float_labels), all_ones) hinge_loss = tf.maximum( all_zeros, tf.scalar_mul(b, all_ones) - sign_labels * predictions) return tf.reduce_mean(tf.reduce_sum(hinge_loss, 1))
def calculate_loss(self, predictions, labels, margin=0.2, adaptive=3.0, origin=1.0, **unused_params): batch_size = FLAGS.batch_size num_classes = FLAGS.num_classes with tf.name_scope("loss_hinge"): # get sim_neg mask = tf.cast(labels, tf.float32) reverse_mask = 1.0 - mask min_true_pred = tf.reduce_min((predictions - 1.0) * mask, axis=1, keep_dims=True) + 1.0 mask_wrong = tf.stop_gradient(tf.cast(predictions > (min_true_pred - margin), tf.float32) * reverse_mask) # get positve samples int_labels = tf.cast(labels, tf.int32) sample_labels = tf.unstack(int_labels, num=batch_size, axis=0) sample_predictions = tf.unstack(predictions, num=batch_size, axis=0) positive_predictions = [] for sample_label, sample_prediction in zip(sample_labels, sample_predictions): indices = tf.where(sample_label > 0) expanded_indices = tf.tile(indices[:,0], [num_classes])[:num_classes] rand_arrange = tf.random_uniform([num_classes], minval=0, maxval=num_classes, dtype=tf.int32) positive_indices = tf.stop_gradient(tf.gather(expanded_indices, rand_arrange)) positive_prediction = tf.gather(sample_prediction, positive_indices) positive_predictions.append(positive_prediction) positive_predictions = tf.stack(positive_predictions) # hinge_loss hinge_loss = tf.maximum(predictions - positive_predictions + margin, 0.0) adaptive_loss = hinge_loss * mask_wrong adaptive_loss = tf.reduce_mean(tf.reduce_sum(adaptive_loss, axis=1)) origin_loss = hinge_loss * reverse_mask origin_loss = tf.reduce_mean(tf.reduce_sum(origin_loss, axis=1)) loss = adaptive * adaptive_loss + origin * origin_loss return loss
def calculate_loss(self, predictions, labels, **unused_params): with tf.name_scope("loss_softmax"): epsilon = 10e-8 float_labels = tf.cast(labels, tf.float32) # l1 normalization (labels are no less than 0) label_rowsum = tf.maximum( tf.reduce_sum(float_labels, 1, keep_dims=True), epsilon) norm_float_labels = tf.div(float_labels, label_rowsum) softmax_outputs = tf.nn.softmax(predictions) softmax_loss = tf.negative(tf.reduce_sum( tf.multiply(norm_float_labels, tf.log(softmax_outputs)), 1)) return tf.reduce_mean(softmax_loss)
def calculate_loss(self, predictions, labels, topk=20, **unused_params): with tf.name_scope("loss_xent_batch"): batch_agreement = FLAGS.batch_agreement epsilon = 10e-6 float_batch_size = float(FLAGS.batch_size) topk_predictions, _ = tf.nn.top_k(predictions, k=20) min_topk_predictions = tf.reduce_min(topk_predictions, axis=1, keep_dims=True) topk_mask = tf.cast(predictions >= min_topk_predictions, dtype=tf.float32) float_labels = tf.cast(labels, tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log(1 - predictions + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss) # minimum positive predictions in topk positive_predictions = (predictions * float_labels * topk_mask) + 1.0 - (float_labels * topk_mask) min_pp = tf.reduce_min(positive_predictions) # maximum negative predictions negative_predictions = predictions * (1.0 - float_labels) max_np = tf.reduce_max(negative_predictions) # 1s that fall under top-k false_negatives = tf.cast(predictions < min_topk_predictions, tf.float32) * float_labels # 0s that grow over 1s in top-k false_positives = tf.cast(predictions > min_pp, tf.float32) * (1.0 - float_labels) * topk_mask weight = (false_negatives + false_positives) * batch_agreement + 1.0 weight = tf.stop_gradient(weight) print weight return tf.reduce_mean(tf.reduce_sum(weight * cross_entropy_loss, 1))
def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params): num_layers = FLAGS.multiscale_cnn_lstm_layers lstm_size = int(FLAGS.lstm_cells) pool_size=2 num_filters=[256,256,512] filter_sizes=[1,2,3] features_size = sum(num_filters) sub_predictions = [] cnn_input = model_input cnn_max_frames = model_input.get_shape().as_list()[1] for layer in range(num_layers): cnn_output = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1)) cnn_output_relu = tf.nn.relu(cnn_output) lstm_memory = self.rnn(cnn_output_relu, lstm_size, num_frames, sub_scope="rnn%d"%(layer+1)) sub_prediction = self.moe(lstm_memory, vocab_size, scopename="moe%d"%(layer+1)) sub_predictions.append(sub_prediction) cnn_max_frames /= pool_size max_pooled_cnn_output = tf.reduce_max( tf.reshape( cnn_output_relu[:, :cnn_max_frames*2, :], [-1, cnn_max_frames, pool_size, features_size] ), axis=2) # for the next cnn layer cnn_input = max_pooled_cnn_output num_frames = tf.maximum(num_frames/pool_size, 1) support_predictions = tf.concat(sub_predictions, axis=1) predictions = tf.add_n(sub_predictions) / len(sub_predictions) return {"predictions": predictions, "support_predictions": support_predictions}
def layer_normalize(self, input_raw, epsilon=1e-8): feature_dim = len(input_raw.get_shape()) - 1 mean_input = tf.reduce_mean(input_raw, axis=feature_dim, keep_dims=True) std_input = tf.sqrt(tf.reduce_mean(tf.square(input_raw-mean_input), axis=feature_dim, keep_dims=True)) std_input = tf.maximum(std_input, epsilon) output = (input_raw - mean_input) / std_input return output
def augment(self, model_input_raw, num_frames, labels_batch, **unused_params): assert(FLAGS.frame_features, "HalfAugmenter only works with frame feature") print "using HalfAugmeter" feature_dim = len(model_input_raw.get_shape()) - 1 frame_dim = len(model_input_raw.get_shape()) - 2 max_frame = model_input_raw.get_shape().as_list()[frame_dim] seg_length = max(int(max_frame / 2), 1) seg_num_frames = tf.maximum(num_frames / 2, 1) seg_inputs = [] seg_frames = [] seg_labels = [] seg_inputs.append(model_input_raw) seg_frames.append(num_frames) seg_labels.append(labels_batch) for i in xrange(2): begin_frames = tf.reshape(seg_num_frames*i, [-1,1]) frames_index = tf.reshape(tf.range(seg_length), [1,seg_length]) frames_index = begin_frames + frames_index batch_size = tf.shape(model_input_raw)[0] batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, seg_length]) index = tf.stack([batch_index, tf.cast(frames_index,dtype=tf.int32)], 2) seg_input = tf.gather_nd(model_input_raw, index) seg_input = tf.pad(seg_input, paddings=[[0,0],[0, max_frame-seg_length],[0,0]]) seg_input = seg_input * tf.expand_dims(tf.sequence_mask(seg_num_frames, maxlen=max_frame, dtype=tf.float32), axis=2) seg_inputs.append(seg_input) seg_frames.append(seg_num_frames) seg_labels.append(labels_batch) new_input_raw = tf.concat(seg_inputs, axis=0) new_num_frames = tf.concat(seg_frames, axis=0) new_labels_batch = tf.concat(seg_labels, axis=0) return new_input_raw, new_labels_batch, new_num_frames
def frame_augment(self, model_input_raw, num_frames, labels_batch, **unused_params): feature_dim = len(model_input_raw.get_shape()) - 1 frame_dim = len(model_input_raw.get_shape()) - 2 max_frame = model_input_raw.get_shape().as_list()[frame_dim] seg_length = max(int(max_frame / 2), 1) seg_num_frames = tf.maximum(num_frames / 2, 1) seg_inputs = [] seg_frames = [] seg_labels = [] seg_inputs.append(model_input_raw) seg_frames.append(num_frames) seg_labels.append(labels_batch) for i in xrange(2): begin_frames = tf.reshape(seg_num_frames*i, [-1,1]) frames_index = tf.reshape(tf.range(seg_length), [1,seg_length]) frames_index = begin_frames + frames_index batch_size = tf.shape(model_input_raw)[0] batch_index = tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, seg_length]) index = tf.stack([batch_index, tf.cast(frames_index,dtype=tf.int32)], 2) seg_input = tf.gather_nd(model_input_raw, index) seg_input = tf.pad(seg_input, paddings=[[0,0],[0, max_frame-seg_length],[0,0]]) seg_input = seg_input * tf.expand_dims(tf.sequence_mask(seg_num_frames, maxlen=max_frame, dtype=tf.float32), axis=2) seg_inputs.append(seg_input) seg_frames.append(seg_num_frames) seg_labels.append(labels_batch) new_input_raw = tf.concat(seg_inputs, axis=0) new_num_frames = tf.concat(seg_frames, axis=0) new_labels_batch = tf.concat(seg_labels, axis=0) return new_input_raw, new_labels_batch, new_num_frames