我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.tuple()。
def expand_bboxes(xmin, xmax, ymin, ymax, cfg): """ Expand the bboxes. """ w = xmax - xmin h = ymax - ymin w = w * cfg.WIDTH_EXPANSION_FACTOR h = h * cfg.HEIGHT_EXPANSION_FACTOR half_w = w / 2. half_h = h / 2. xmin = tf.clip_by_value(xmin - half_w, 0, 1) xmax = tf.clip_by_value(xmax + half_w, 0, 1) ymin = tf.clip_by_value(ymin - half_h, 0, 1) ymax = tf.clip_by_value(ymax + half_h, 0, 1) return tf.tuple([xmin, xmax, ymin, ymax])
def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None): """Compute precision and recall from scores, true positives and false positives booleans arrays """ # Sort by score. with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]): # Sort detections by score. scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True) tp = tf.gather(tp, idxes) fp = tf.gather(fp, idxes) # Computer recall and precision. dtype = tf.float64 tp = tf.cumsum(tf.cast(tp, dtype), axis=0) fp = tf.cumsum(tf.cast(fp, dtype), axis=0) recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall') precision = _safe_div(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def recompute_grad(fn): """Decorator that recomputes the function on the backwards pass. Args: fn: a function that takes Tensors (all as positional arguments) and returns a tuple of Tensors. Returns: A wrapped fn that is identical to fn when called, but its activations will be discarded and recomputed on the backwards pass (i.e. on a call to tf.gradients). """ @functools.wraps(fn) def wrapped(*args): return _recompute_grad(fn, args) return wrapped
def fn_device_dependency(name, device=""): """Add control deps for name and device.""" key = name + "_" + device outs = [] def body(): with tf.control_dependencies(fn_device_dependency_dict()[key]): yield outs assert outs deps = outs if isinstance(outs[0], list) or isinstance(outs[0], tuple): assert len(outs) == 1 deps = outs[0] fn_device_dependency_dict()[key] = deps if device: with tf.device(device): return body() else: return body()
def _clip_grad_global_norms(self, tvars, loss, opt, global_norm=8, gate_gradients=1, gradient_noise_scale=None, GATE_GRAPH=2, grad_loss=None, agre_method=None, col_grad_ops=False): """Clips the gradients by the given value. Args: tvars: trainable variables used for gradint updates loss: total loss of the network opt: optimizer global_norm: the maximum global norm Returns: A list of clipped gradient to variable pairs. """ var_refs = [v.read_value() for v in tvars] grads = tf.gradients(loss, var_refs, grad_ys=grad_loss, gate_gradients=( gate_gradients == 1), aggregation_method=agre_method, colocate_gradients_with_ops=col_grad_ops) if gradient_noise_scale is not None: grads = self._add_scaled_noise_to_gradients( list(zip(grads, tvars)), gradient_noise_scale=gradient_noise_scale) if gate_gradients == GATE_GRAPH: grads = tf.tuple(grads) grads, _ = tf.clip_by_global_norm(grads, global_norm) grads_and_vars = list(zip(grads, tvars)) return grads_and_vars
def _get_step(self, inputs): Z, Y, X, theta, lmbd = self.inputs K, p = self.D.shape L = self.L with tf.name_scope("ISTA_iteration"): self.S = tf.constant(np.eye(K, dtype=np.float32) - self.S0/L, shape=[K, K], name='S') self.We = tf.constant(self.D.T/L, shape=[p, K], dtype=tf.float32, name='We') hk = tf.matmul(Y, self.S) + tf.matmul(X, self.We) self.step_FISTA = Zk = soft_thresholding(hk, lmbd/L) # self.theta_k = tk = (tf.sqrt(theta*theta+4) - theta)*theta/2 self.theta_k = tk = (1 + tf.sqrt(1 + 4*theta*theta))/2 dZ = tf.subtract(Zk, Z) # self.Yk = Zk + tk*(1/theta-1)*dZ self.Yk = Zk + (theta-1)/tk*dZ self.dz = tf.reduce_mean(tf.reduce_sum( dZ*dZ, reduction_indices=[1])) step = tf.tuple([Zk, tk, self.Yk]) return step, self.dz
def birnn(cell, inputs, sequence_length, initial_state_fw=None, initial_state_bw=None, ff_keep_prob=1., recur_keep_prob=1., dtype=tf.float32, scope=None): """""" # Forward direction with tf.variable_scope(scope or 'BiRNN_FW') as fw_scope: output_fw, output_state_fw = rnn(cell, inputs, sequence_length, initial_state_fw, ff_keep_prob, recur_keep_prob, dtype, scope=fw_scope) # Backward direction rev_inputs = tf.reverse_sequence(inputs, sequence_length, 1, 0) with tf.variable_scope(scope or 'BiRNN_BW') as bw_scope: output_bw, output_state_bw = rnn(cell, rev_inputs, sequence_length, initial_state_bw, ff_keep_prob, recur_keep_prob, dtype, scope=bw_scope) output_bw = tf.reverse_sequence(output_bw, sequence_length, 1, 0) # Concat each of the forward/backward outputs outputs = tf.concat([output_fw, output_bw], 2) return outputs, tf.tuple([output_state_fw, output_state_bw]) #===============================================================
def filterOutputBoxes(self, boxes, scores, others=[], preNmsCount=6000, maxOutSize=300, nmsThreshold=0.7): with tf.name_scope("filter_output_boxes"): scores = tf.nn.softmax(scores)[:,1] scores = tf.reshape(scores,[-1]) #Clip boxes to edge boxes = self.clipBoxesToEdge(boxes) #Remove empty boxes boxes, scores = BoxUtils.filterSmallBoxes(boxes, [scores]) scores, boxes = tf.cond(tf.shape(scores)[0] > preNmsCount , lambda: tf.tuple(MultiGather.gatherTopK(scores, preNmsCount, [boxes])), lambda: tf.tuple([scores, boxes])) #NMS filter nmsIndices = tf.image.non_max_suppression(boxes, scores, iou_threshold=nmsThreshold, max_output_size=maxOutSize) nmsIndices = tf.expand_dims(nmsIndices, axis=-1) return MultiGather.gather([boxes, scores]+others, nmsIndices)
def _combine(self, concat, *argv): if concat: y = _concat(list(argv), axis=3) else: y = tuple(argv) return y
def _split(self, concat, n_filter, x): if concat or type(x) != tuple: x1 = x[:, :, :, :n_filter // 2] x2 = x[:, :, :, n_filter // 2:] else: x1, x2 = x return x1, x2
def precision_recall(num_gbboxes, num_detections, tp, fp, scores, dtype=tf.float64, scope=None): """Compute precision and recall from scores, true positives and false positives booleans arrays """ # Input dictionaries: dict outputs as streaming metrics. if isinstance(scores, dict): d_precision = {} d_recall = {} for c in num_gbboxes.keys(): scope = 'precision_recall_%s' % c p, r = precision_recall(num_gbboxes[c], num_detections[c], tp[c], fp[c], scores[c], dtype, scope) d_precision[c] = p d_recall[c] = r return d_precision, d_recall # Sort by score. with tf.name_scope(scope, 'precision_recall', [num_gbboxes, num_detections, tp, fp, scores]): # Sort detections by score. scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True) tp = tf.gather(tp, idxes) fp = tf.gather(fp, idxes) # Computer recall and precision. tp = tf.cumsum(tf.cast(tp, dtype), axis=0) fp = tf.cumsum(tf.cast(fp, dtype), axis=0) recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall') precision = _safe_div(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def precision_recall_values(xvals, precision, recall, name=None): """Compute values on the precision/recall curve. Args: x: Python list of floats; precision: 1D Tensor decreasing. recall: 1D Tensor increasing. Return: list of precision values. """ with ops.name_scope(name, "precision_recall_values", [precision, recall]) as name: # Add bounds values to precision and recall. precision = tf.concat([[0.], precision, [0.]], axis=0) recall = tf.concat([[0.], recall, [1.]], axis=0) precision = tfe_math.cummax(precision, reverse=True) prec_values = [] for x in xvals: mask = tf.less_equal(recall, x) val = tf.reduce_min(tf.boolean_mask(precision, mask)) prec_values.append(val) return tf.tuple(prec_values) # =========================================================================== # # TF Extended metrics: old stuff! # =========================================================================== #
def as_tuple_or_list(obj): """ Make sure that `obj` is a tuple or a list and eventually converts it into a list with a single element :param obj: :return: A `tuple` or a `list` """ return obj if isinstance(obj, (list, tuple)) else [obj]
def assign(self, value_list): if isinstance(value_list, ZMergedMatrix): value_list = value_list.components assert len(value_list) == len(self.components), 'the length of value_list and of z, components must coincide' value_list = tf.tuple(value_list) # THIS PROBABLY SOLVES THE PROBLEM! ao1 = [tf.assign(c, v) for c, v in zip(self.components, value_list)] return tf.group(*ao1) # noinspection PyUnusedLocal
def _array_to_tuple(inputs, size, shape=None): """ Convert tf.TensorArray to tf.Tuple. """ with tf.variable_scope('array_to_tuple'): if shape is None: output = tf.tuple([inputs.read(i) for i in range(size)]) else: output = tf.tuple([tf.reshape(inputs.read(i), shape) for i in range(size)]) return output
def stop_gradient_tuple(self, inputs): """ Stop gradients through tf.tuple. """ for i, _ in enumerate(inputs): inputs[i] = tf.stop_gradient(inputs[i]) return inputs
def create_bbox_batch(cls, inputs, batch_size=64): """ Create batch indices for bboxes. """ batch = [] for indices in inputs: indices = tf.random_shuffle(indices) start = [0] * 2 size = [tf.minimum(batch_size, tf.shape(indices)[0]), -1] sample = tf.slice(indices, start, size) sample.set_shape([None, 1]) batch.append(sample) batch = tf.tuple(batch) return batch
def _unstack_tuple(self, inputs, tensor_sizes): size = len(tensor_sizes) start_position = tf.constant(0) output = [] dim = len(inputs.get_shape().as_list())-1 for i in range(size): output.append(tf.slice(inputs, begin=[start_position, *([0]*dim)], size=[tensor_sizes[i], *([-1]*dim)])) start_position = start_position + tensor_sizes[i] return tf.tuple(output)
def unflatten_into_tensors(flatparams_P, output_shapes, name=None): """ Unflattens a vector produced by flatcat into a list of tensors of the specified shapes. """ with tf.op_scope([flatparams_P], name, 'unflatten_into_tensors') as scope: outputs = [] curr_pos = 0 for shape in output_shapes: size = np.prod(shape).astype('int') flatval = flatparams_P[curr_pos:curr_pos + size] outputs.append(tf.reshape(flatval, shape)) curr_pos += size assert curr_pos == flatparams_P.get_shape().num_elements(), "{} != {}".format( curr_pos, flatparams_P.get_shape().num_elements()) return tf.tuple(outputs, name=scope)
def subsample_feed(feed, frac): assert isinstance(feed, tuple) and len(feed) >= 1 assert isinstance(frac, float) and 0. < frac <= 1. l = feed[0].shape[0] assert all(a.shape[0] == l for a in feed), 'All feed entries must have the same length' subsamp_inds = np.random.choice(l, size=int(frac * l)) return tuple(a[subsamp_inds, ...] for a in feed)
def dot_product_attention(q, k, v, bias, dropout_rate=0.0, image_shapes=None, name=None, make_image_summary=True): """dot-product attention. Args: q: a Tensor with shape [batch, heads, length_q, depth_k] k: a Tensor with shape [batch, heads, length_kv, depth_k] v: a Tensor with shape [batch, heads, length_kv, depth_v] bias: bias Tensor (see attention_bias()) dropout_rate: a floating point number image_shapes: optional tuple of integer scalars. see comments for attention_image_summary() name: an optional string make_image_summary: True if you want an image summary. Returns: A Tensor. """ with tf.variable_scope( name, default_name="dot_product_attention", values=[q, k, v]): logits = tf.matmul(q, k, transpose_b=True) if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") weights = tf.nn.dropout(weights, 1.0 - dropout_rate) return tf.matmul(weights, v)
def _setup_summaries(self): with tf.name_scope('summaries'): self.epoch_loss = tf.placeholder( tf.float32, shape=[], name="epoch_loss") # Training summaries tf.summary.scalar('learning rate', self.learning_rate, collections=[TRAINING_EPOCH_SUMMARIES]) tf.summary.scalar('training (cross entropy) loss', self.epoch_loss, collections=[TRAINING_EPOCH_SUMMARIES]) if len(self.inputs.get_shape()) == 4: summary.summary_image(self.inputs, 'inputs', max_images=10, collections=[ TRAINING_BATCH_SUMMARIES]) for key, val in self.training_end_points.iteritems(): summary.summary_activation(val, name=key, collections=[ TRAINING_BATCH_SUMMARIES]) summary.summary_trainable_params(['scalar', 'histogram', 'norm'], collections=[ TRAINING_BATCH_SUMMARIES]) summary.summary_gradients(self.grads_and_vars, [ 'scalar', 'histogram', 'norm'], collections=[TRAINING_BATCH_SUMMARIES]) # Validation summaries for key, val in self.validation_end_points.iteritems(): summary.summary_activation(val, name=key, collections=[ VALIDATION_BATCH_SUMMARIES]) tf.summary.scalar('validation loss', self.epoch_loss, collections=[VALIDATION_EPOCH_SUMMARIES]) self.validation_metric_placeholders = [] for metric_name, _ in self.validation_metrics_def: validation_metric = tf.placeholder( tf.float32, shape=[], name=metric_name.replace(' ', '_')) self.validation_metric_placeholders.append(validation_metric) tf.summary.scalar(metric_name, validation_metric, collections=[VALIDATION_EPOCH_SUMMARIES]) self.validation_metric_placeholders = tuple( self.validation_metric_placeholders)
def get_state_variables_for_batch(state_variables, batch_size): """Returns a subset of the state variables. This function takes the state variables returned by get_state_variables() and returns a subset for an actual forward-propagation run. Specifically, it clips each of the state variables to the given batch size. Before this call, each variable's first dimension has length max_batch_size but when the input has a lower batch size, the LSTM should also only update the state variables for the used batches. See get_state_variables() for more info. Args: state_variables (tuple[tf.contrib.rnn.LSTMStateTuple]): The LSTM's state variables. batch_size (tf.Tensor): An 0-dimensional tensor containing the batch size tensor in the computational graph. Returns: tuple[tf.contrib.rnn.LSTMStateTuple]: A new tuple of state variables clipped to the given batch size. """ # Return a tuple of LSTMStateTuples but with only the first batch_size rows for each variable # in the tuples. result = [] for state_c, state_h in state_variables: result.append(tf.contrib.rnn.LSTMStateTuple(state_c[:batch_size], state_h[:batch_size])) return tuple(result)
def get_state_update_op(state_variables, new_states): """Returns an operation to update an LSTM's state variables. See get_state_variables() for more info. Args: state_variables (tuple[tf.contrib.rnn.LSTMStateTuple]): The LSTM's state variables. new_states (tuple[tf.contrib.rnn.LSTMStateTuple]): The new values for the state variables. new_states may have state tuples with state sizes < max_batch_size. Then, only the first rows of the corresponding state variables will be updated. Returns: tf.Operation: An operation that updates the LSTM's. """ # Add an operation to update the train states with the last state tensors. update_ops = [] for state_variable, new_state in zip(state_variables, new_states): # new_state[0] might be smaller than state_variable[0], because state_variable[0] # contains max_batch_size entries. # Get the update indices for both states in the tuple update_indices = (tf.range(0, tf.shape(new_state[0])[0]), tf.range(0, tf.shape(new_state[1])[0])) update_ops.extend([ tf.scatter_update(state_variable[0], update_indices[0], new_state[0]), tf.scatter_update(state_variable[1], update_indices[1], new_state[1]) ]) return tf.tuple(update_ops)
def get_state_reset_op(state_variables, cell, max_batch_size): """Returns an operation to set each variable in a list of LSTMStateTuples to zero. See get_state_variables() for more info. Args: state_variables (tuple[tf.contrib.rnn.LSTMStateTuple]): The LSTM's state variables. cell (tf.contrib.rnn.MuliRNNCell): An MultiRNNCell consisting of multiple LSTMCells. max_batch_size (int): The maximum size of batches that are be fed to the LSTMCell. Returns: tf.Operation: An operation that sets the LSTM's state to zero. """ zero_states = cell.zero_state(max_batch_size, tf.float32) return get_state_update_op(state_variables, zero_states)
def _build_computation_graph(self, x, y, opt): """Builds the (device or runtime specific) computation graph. Parameters ---------- x: n-D Tensor The inputs tensor. y: m-D Tensor The targets tensor. opt: Optimizer The TensorFlow optimizer instance. Returns ---------- A tuple of (grads, summaries, total_loss, loss, eval_dict) """ pass
def datasets(self): """Gets the datasets as a named tuple. Use the members ds.train, ds.valid or ds.test of the returned tuple.""" return self._datasets
def placeholders(self): """Gets the placeholders as a named tuple.""" return self._ph
def gpu(self): """Gets the gpu config as a named tuple.""" return self._gpu
def __init__(self, x, size, selectTrain, sess, toTarget=None, ts=0.001): self.sess = sess self.mean_x_train, self.variance_x_train = moments(x, [0]) #self.mean_x_ma, self.variance_x_ma = moments(self.x_splh, [0]) self.mean_x_ma = tf.Variable(tf.zeros([size])) self.variance_x_ma = tf.Variable(tf.ones([size])) self.update = tf.tuple([self.variance_x_ma.assign(0.95*self.variance_x_ma+ 0.05*self.variance_x_train)] , control_inputs=[self.mean_x_ma.assign(0.95*self.mean_x_ma+ 0.05*self.mean_x_train)])[0] self.mean_x_ma_update = tf.tuple([self.mean_x_train] , control_inputs=[])[0] self.printUp = tf.Print(self.mean_x_ma_update, [selectTrain], message="selectTrain value : ") self.variance_x_ma_update = tf.tuple([self.variance_x_train], control_inputs=[])[0] def getxmau(): return self.mean_x_ma_update def getxma(): return self.mean_x_ma def getvxmau(): return self.variance_x_ma_update def getvxma(): return self.variance_x_ma self.mean_x = tf.cond(selectTrain, getxmau, getxma) self.variance_x = tf.cond(selectTrain, getvxmau, getvxma) self.beta = tf.Variable(tf.zeros([size])) self.gamma = tf.Variable(tf.ones([size])) #tfs.tfs.session.run(tf.initialize_variables([self.beta, self.gamma]))#, self.mean_x_ma, self.variance_x_ma])) self.xNorm = tf.reshape(tf.nn.batch_norm_with_global_normalization(tf.reshape(x, [-1, 1, 1, size]), self.mean_x, self.variance_x, self.beta, self.gamma, 0.01, True), [-1, size]) if toTarget!=None: self.isTracking = toTarget self.updateBeta = self.beta.assign(self.beta*(1-ts)+self.isTracking.beta*ts) self.updateGamma = self.gamma.assign(self.gamma*(1-ts)+self.isTracking.gamma*ts) self.updateTarget = tf.group(self.updateBeta, self.updateGamma)
def _rev_layer_forward(xs, f, g, f_side_input, g_side_input, gate_outputs=False): """Forward for 1 reversible layer.""" x1, x2 = xs y1 = x1 + (f(x2, f_side_input) if f_side_input else f(x2)) y2 = x2 + (g(y1, g_side_input) if g_side_input else g(y1)) if gate_outputs: return tf.tuple([y1, y2]) else: return (y1, y2)
def _recompute_grad(fn, args): """See recompute_grad.""" cached_vs = [] cached_arg_scope = [] def grad_fn(inputs, variables, outputs, output_grads): """Recompute outputs for gradient computation.""" del outputs # Recompute outputs with tf.control_dependencies(output_grads): with tf.contrib.framework.arg_scope(cached_arg_scope[0]): with tf.variable_scope(cached_vs[0], reuse=True): outputs = fn(*inputs) if not (isinstance(outputs, list) or isinstance(outputs, tuple)): outputs = [outputs] outputs = list(outputs) grads = tf.gradients(outputs, inputs + variables, output_grads) grad_inputs = grads[:len(inputs)] grad_vars = grads[len(inputs):] return grad_inputs, grad_vars @common_layers.fn_with_custom_grad(grad_fn) def fn_with_recompute(*args): cached_vs.append(tf.get_variable_scope()) # TODO(rsepassi): Rm conditional in TF 1.5 if hasattr(tf.contrib.framework, "current_arg_scope"): cached_arg_scope.append(tf.contrib.framework.current_arg_scope()) else: cached_arg_scope.append({}) return fn(*args) return fn_with_recompute(*args)
def precision_recall(num_gbboxes, tp, fp, scope=None): """Compute precision and recall from true positives and false positives booleans arrays """ # Sort by score. with tf.name_scope(scope, 'precision_recall'): # Computer recall and precision. tp = tf.reduce_sum(tf.cast(tp, tf.float32), axis=0) fp = tf.reduce_sum(tf.cast(fp, tf.float32), axis=0) recall = tfe_math.safe_divide(tp, tf.cast(num_gbboxes, tf.float32), 'recall') precision = tfe_math.safe_divide(tp, tp + fp, 'precision') return tf.tuple([precision, recall])
def mirror(image, boxes): def doMirror(image, boxes): image = tf.reverse(image, axis=[2]) x0,y0,x1,y1 = tf.unstack(boxes, axis=1) w=tf.cast(tf.shape(image)[2], tf.float32) x0_m=w-x1 x1_m=w-x0 return image, tf.stack([x0_m,y0,x1_m,y1], axis=1) uniform_random = tf.random_uniform([], 0, 1.0) return tf.cond(uniform_random < 0.5, lambda: tf.tuple([image, boxes]), lambda: doMirror(image, boxes))
def gatherTopK(t, k, others=[], sorted=False): res=[] with tf.name_scope("gather_top_k"): isMoreThanK = tf.shape(t)[-1]>k values, indices = tf.cond(isMoreThanK, lambda: tf.nn.top_k(t, k=k, sorted=sorted), lambda: tf.tuple([t, tf.zeros((0,1), tf.int32)])) indices = tf.reshape(indices, [-1,1]) res.append(values) for o in others: res.append(tf.cond(isMoreThanK, lambda: tf.gather_nd(o, indices), lambda: o)) return res
def distorted_bounding_box_crop(image, bbox, min_object_covered=0.1, aspect_ratio_range=(0.75, 1.33), area_range=(0.05, 1.0), max_attempts=100, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. # A large fraction of image datasets contain a human-annotated bounding # box delineating the region of the image containing the object of interest. # We choose to create a new bounding box for the object which is a randomly # distorted version of the human-annotated bounding box that obeys an # allowed range of aspect ratios, sizes and overlap with the human-annotated # bounding box. If no box is supplied, then we assume the bounding box is # the entire image. sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) return tf.tuple([cropped_image, distort_bbox])
def non_max_suppression(inputs, scores, batch_size, max_output_size, score_threshold=0.7, iou_threshold=0.7, nonempty=False, name='nms'): """ Perform NMS on batch of images. Parameters ---------- inputs: tf.Tuple each components is a set of bboxes for corresponding image scores: tf.Tuple scores of inputs batch_size: size of batch of inputs max_output_size: maximal size of bboxes per image score_threshold: float bboxes with score less the score_threshold will be dropped iou_threshold: float bboxes with iou which is greater then iou_threshold will be merged nonempty: bool if True at least one bbox per image will be returned name: str scope name Returns ------- tf.Tuple indices of selected bboxes for each image """ with tf.variable_scope(name): ix = tf.constant(0) filtered_rois = tf.TensorArray(dtype=tf.int32, size=batch_size, infer_shape=False) loop_cond = lambda ix, filtered_rois: tf.less(ix, batch_size) def _loop_body(ix, filtered_rois): indices, score, roi = _filter_tensor(scores[ix], score_threshold, inputs[ix]) # pylint: disable=unbalanced-tuple-unpacking roi_corners = tf.concat([roi[:, :2], roi[:, :2]+roi[:, 2:]], axis=-1) roi_after_nms = tf.image.non_max_suppression(roi_corners, score, max_output_size, iou_threshold) if nonempty: is_not_empty = lambda: filtered_rois.write(ix, tf.cast(tf.gather(indices, roi_after_nms), dtype=tf.int32)) is_empty = lambda: filtered_rois.write(ix, tf.constant([[0]])) filtered_rois = tf.cond(tf.not_equal(tf.shape(indices)[0], 0), is_not_empty, is_empty) else: filtered_rois = filtered_rois.write(ix, tf.cast(tf.gather(indices, roi_after_nms), dtype=tf.int32)) return [ix+1, filtered_rois] _, res = tf.while_loop(loop_cond, _loop_body, [ix, filtered_rois]) res = _array_to_tuple(res, batch_size, [-1, 1]) return res
def _rcn_head(self, inputs, image_shape, nms_threshold, rpn_thresholds, rcn_batch, batch_size, name='rcn_head', **kwargs): anchors_labels = self.anchors_placeholders['labels'] feature_maps, rpn_reg, rpn_cls = inputs n_anchors = self.n_anchors with tf.variable_scope(name): rcn_input_indices = non_max_suppression(rpn_reg, rpn_cls, batch_size, n_anchors, iou_threshold=nms_threshold, score_threshold=rpn_thresholds[1], nonempty=True) rcn_input_indices = tf.cond(self.is_training, lambda: self.create_bbox_batch(rcn_input_indices, rcn_batch), lambda: rcn_input_indices) rcn_input_rois, rcn_input_labels = self._get_rois_and_labels(rpn_reg, anchors_labels, rcn_input_indices) for tensor in rcn_input_rois: tf.add_to_collection('roi', tensor) for tensor in rcn_input_labels: tf.add_to_collection('targets', tensor) roi_factor = np.array(self.map_shape/image_shape) rcn_input_rois = self.stop_gradient_tuple(rcn_input_rois) rcn_input_labels = self.stop_gradient_tuple(rcn_input_labels) roi_cropped = roi_pooling_layer(feature_maps, rcn_input_rois, factor=roi_factor, shape=(7, 7), data_format=kwargs['data_format']) indices, roi_cropped, rcn_input_labels = self._stack_tuple(roi_cropped, rcn_input_labels) # pylint: disable=unbalanced-tuple-unpacking rcn_clsf = conv_block(roi_cropped, 'f', units=10, name='output_conv', **kwargs) loss = self.rcn_loss(rcn_clsf, rcn_input_labels) rcn_clsf = tf.argmax(rcn_clsf, axis=-1) rcn_clsf = self._unstack_tuple(rcn_clsf, indices) rcn_clsf = tf.tuple(rcn_clsf, name='clsf') for tensor in rcn_clsf: tf.add_to_collection('rcn_output', tensor) loss = tf.identity(loss, 'loss') return rcn_clsf, loss
def conv2d_v2(inputs, n_output_channels, is_training, reuse, **kwargs): """Adds a 2D dilated convolutional layer also known as convolution with holes or atrous convolution. If the rate parameter is equal to one, it performs regular 2-D convolution. If the rate parameter is greater than one, it performs convolution with holes, sampling the input values every rate pixels in the height and width dimensions. `convolutional layer` creates a variable called `weights`, representing a conv weight matrix, which is multiplied by the `x` to produce a `Tensor` of hidden units. If a `batch_norm` is provided (such as `batch_norm`), it is then applied. Otherwise, if `batch_norm` is None and a `b_init` and `use_bias` is provided then a `biases` variable would be created and added the hidden units. Finally, if `activation` is not `None`, it is applied to the hidden units as well. Note: that if `x` have a rank 4 Args: x: A 4-D `Tensor` of with rank 4 and value for the last dimension, i.e. `[batch_size, in_height, in_width, depth]`, is_training: Bool, training or testing n_output: Integer or long, the number of output units in the layer. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. filter_size: a int or list/tuple of 2 positive integers specifying the spatial dimensions of of the filters. dilation: A positive int32. The stride with which we sample input values across the height and width dimensions. Equivalently, the rate by which we upsample the filter values by inserting zeros across the height and width dimensions. In the literature, the same parameter is sometimes called input stride/rate or dilation. padding: one of `"VALID"` or `"SAME"`. IF padding is LEFT, it preprocess the input to use Valid padding activation: activation function, set to None to skip it and maintain a linear activation. batch_norm: normalization function to use. If `batch_norm` is `True` then google original implementation is used and if another function is provided then it is applied. default set to None for no normalizer function batch_norm_args: normalization function parameters. w_init: An initializer for the weights. w_regularizer: Optional regularizer for the weights. untie_biases: spatial dimensions wise baises b_init: An initializer for the biases. If None skip biases. outputs_collections: The collections to which the outputs are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). name: Optional name or scope for variable_scope/name_scope. use_bias: Whether to add bias or not Returns: The 4-D `Tensor` variable representing the result of the series of operations. e.g.: 4-D `Tensor` [batch, new_height, new_width, n_output]. Raises: ValueError: if x has rank less than 4 or if its last dimension is not set. """ if 'padding' in kwargs and kwargs['padding'] == 'LEFT': inputs, kwargs = format_input_left_padding(inputs, **kwargs) return dilated_conv2d(inputs, n_output_channels, is_training, reuse, **kwargs)
def conv2d_gru(inputs, n_output_channels, is_training, reuse, filter_size=3, padding="SAME", dilation=1, name='conv2d_gru', outputs_collections=None, **kwargs): """Adds a convolutional GRU layer in 1 dimension Args: x: A 4-D `Tensor` of with rank 4 and value for the last dimension, i.e. `[batch_size, in_height, in_width, depth]`, is_training: Bool, training or testing n_output: Integer or long, the number of output units in the layer. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. filter_size: a int or list/tuple of 2 positive integers specifying the spatial dimensions of of the filters. dilation: A positive int32. The stride with which we sample input values across the height and width dimensions. Equivalently, the rate by which we upsample the filter values by inserting zeros across the height and width dimensions. In the literature, the same parameter is sometimes called input stride/rate or dilation. padding: one of `"VALID"` or `"SAME"`. IF padding is LEFT, it preprocess the input to use Valid padding activation: activation function, set to None to skip it and maintain a linear activation. batch_norm: normalization function to use. If `batch_norm` is `True` then google original implementation is used and if another function is provided then it is applied. default set to None for no normalizer function batch_norm_args: normalization function parameters. w_init: An initializer for the weights. w_regularizer: Optional regularizer for the weights. untie_biases: spatial dimensions wise baises b_init: An initializer for the biases. If None skip biases. outputs_collections: The collections to which the outputs are added. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). name: Optional name or scope for variable_scope/name_scope. use_bias: Whether to add bias or not Returns: The 4-D `Tensor` variable representing the result of the series of operations. e.g.: 4-D `Tensor` [batch, new_height, new_width, n_output]. Raises: ValueError: if x has rank less than 4 or if its last dimension is not set. """ def conv2d_fn(x, name, bias_start, padding): return conv2d_v2(x, n_output_channels, is_training, reuse, filter_size=filter_size, padding=padding, b_init=bias_start, dilation=dilation, name=name, **kwargs) with tf.variable_scope(name, reuse=reuse): reset = saturating_sigmoid(conv2d_fn(inputs, "reset", 1.0, padding)) gate = saturating_sigmoid(conv2d_fn(inputs, "gate", 1.0, padding)) candidate = tf.tanh( conv2d_fn(reset * inputs, "candidate", 0.0, padding)) outputs = gate * inputs + (1 - gate) * candidate return _collect_named_outputs(outputs_collections, name, outputs)