我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用tensorflow.pad()。
def _spatial_replication_padding(x, stride, output_shape, filter_shape): _, in_height, in_width, chan = utils.tensor_shape(x) _, out_height, out_width, chan = output_shape filter_height, filter_width = filter_shape total_padding_height = (out_height * stride + filter_height - 1) - in_height total_padding_width = (out_width * stride + filter_width - 1) - in_width padding_top = total_padding_height // 2 padding_bottom = total_padding_height - padding_top padding_left = total_padding_width // 2 padding_right = total_padding_width - padding_left paddings = [padding_top, padding_bottom, padding_left, padding_right] while max(paddings) > 0: new_paddings = [max(0, p - 1) for p in paddings] deltas = [o - n for o, n in zip(paddings, new_paddings)] step_paddings = [[0, 0], [deltas[0], deltas[1]], [deltas[2], deltas[3]], [0, 0]] x = tf.pad(x, step_paddings, mode='SYMMETRIC') paddings = new_paddings return x
def shift(self, model_input, shift_width, **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in xrange(shift_width): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) shift_output = tf.concat(shift_inputs, axis=2) return shift_output
def augment(self, model_input_raw, num_frames, labels_batch, **unused_params): assert(FLAGS.frame_feature, "AugmentationTransformer only works with frame feature") feature_dim = len(model_input_raw.get_shape()) - 1 frame_dim = len(model_input_raw.get_shape()) - 2 max_frame = model_input_raw.get_shape().as_list()[frame_dim] limit = tf.cast(tf.reduce_min(num_frames) / 4.0, tf.int32) offset = tf.random_uniform(shape=[], dtype=tf.int32) % limit input_trans1 = tf.pad(model_input_raw[:,offset:,:], paddings=[0,offset,0]) num_frames_trans1 = num_frames - offset num_frames_trans1 = tf.cast( tf.random_uniform(shape=num_frames.shape, minval=0.75, maxval=1.0, dtype=tf.float32) * num_frames_trans1, tf.int32) model_input = tf.concat([model_input_raw, input_trans1], axis=0) labels_batch = tf.concat([labels_batch, labels_batch], axis=0) num_frames = tf.concat([num_frames, num_frames_trans1], axis=0) return model_input, labels_batch, num_frames_new
def get_default_config(self): config = BasicModel.get_default_config(self) model_config = { 'stride': 1, 'inception_v4_checkpoint_file': os.path.join(script_dir, '..', 'data', 'inception_v4.ckpt'), 'batch_norm_decay': 0.99, 'batch_norm_epsilon': 0.001, 'output_size': 29, 'pad': 32, 'receptive_field_size': 66, 'projective_field_size': 7, 'contextual_pad': 32, 'normalize_inputs': False, 'batch_size': 64, } config.update(model_config) return config
def apply_shortcut(self, prev_inp, ch_in, ch_out, phase_train=None, w=None, bn=None, stride=None): if self.shortcut == 'projection': if self.dilation: prev_inp = DilatedConv2D(w, rate=stride)(prev_inp) else: prev_inp = Conv2D(w, stride=stride)(prev_inp) prev_inp = bn({'input': prev_inp, 'phase_train': phase_train}) elif self.shortcut == 'identity': pad_ch = ch_out - ch_in if pad_ch < 0: raise Exception('Must use projection when ch_in > ch_out.') prev_inp = tf.pad(prev_inp, [[0, 0], [0, 0], [0, 0], [0, pad_ch]]) if stride > 1: prev_inp = AvgPool(stride)(prev_inp) raise Exception('DEBUG Unknown') self.log.info('After proj shape: {}'.format( prev_inp.get_shape())) return prev_inp
def apply_shortcut(self, prev_inp, ch_in, ch_out, phase_train=None, w=None, stride=None): if self.shortcut == 'projection': if self.dilation: prev_inp = DilatedConv2D(w, rate=stride)(prev_inp) else: prev_inp = Conv2D(w, stride=stride)(prev_inp) bn = BatchNorm(ch_out) prev_inp = bn({'input': prev_inp, 'phase_train': phase_train}) elif self.shortcut == 'identity': pad_ch = ch_out - ch_in if pad_ch < 0: raise Exception('Must use projection when ch_in > ch_out.') prev_inp = tf.pad(prev_inp, [[0, 0], [0, 0], [0, 0], [0, pad_ch]]) if stride > 1: prev_inp = AvgPool(stride)(prev_inp) bn = None self.log.info('After proj shape: {}'.format( prev_inp.get_shape())) return prev_inp, bn
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'): '''Pads the 2nd and 3rd dimensions of a 4D tensor with "padding[0]" and "padding[1]" (resp.) zeros left and right. ''' if dim_ordering == 'default': dim_ordering = image_dim_ordering() if dim_ordering not in {'th', 'tf'}: raise ValueError('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'th': pattern = [[0, 0], [0, 0], [padding[0], padding[0]], [padding[1], padding[1]]] else: pattern = [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]] return tf.pad(x, pattern)
def asymmetric_spatial_2d_padding(x, top_pad=1, bottom_pad=1, left_pad=1, right_pad=1, dim_ordering='default'): '''Pad the rows and columns of a 4D tensor with "top_pad", "bottom_pad", "left_pad", "right_pad" (resp.) zeros rows on top, bottom; cols on left, right. ''' if dim_ordering == 'default': dim_ordering = image_dim_ordering() if dim_ordering not in {'th', 'tf'}: raise ValueError('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'th': pattern = [[0, 0], [0, 0], [top_pad, bottom_pad], [left_pad, right_pad]] else: pattern = [[0, 0], [top_pad, bottom_pad], [left_pad, right_pad], [0, 0]] return tf.pad(x, pattern)
def apply_time_pooling(inputs, sequence_length, stride, pooling_avg=False): shape = [tf.shape(inputs)[0], tf.shape(inputs)[1], inputs.get_shape()[2].value] if pooling_avg: inputs_ = [inputs[:, i::stride, :] for i in range(stride)] max_len = tf.shape(inputs_[0])[1] for k in range(1, stride): len_ = tf.shape(inputs_[k])[1] paddings = tf.stack([[0, 0], [0, max_len - len_], [0, 0]]) inputs_[k] = tf.pad(inputs_[k], paddings=paddings) inputs = tf.reduce_sum(inputs_, axis=0) / len(inputs_) else: inputs = inputs[:, ::stride, :] inputs = tf.reshape(inputs, tf.stack([shape[0], tf.shape(inputs)[1], shape[2]])) sequence_length = (sequence_length + stride - 1) // stride # rounding up return inputs, sequence_length
def zoomout(image, gt_bboxes, params): X_out = tf.random_uniform([], 1.05, params['X_out']) h, w, _ = tf.unstack(tf.to_float(tf.shape(image))) zoomout_color = params['zoomout_color']+[0] bg_color = tf.constant(zoomout_color, dtype=tf.float32) x_shift = tf.random_uniform([], 0, (X_out - 1) * w) y_shift = tf.random_uniform([], 0, (X_out - 1) * h) x2_shift = (X_out - 1) * w - x_shift y2_shift = (X_out - 1) * h - y_shift # somewhat hacky solution to pad with MEAN_COLOR # tf.pad does not support custom constant padding unlike numpy image -= bg_color image = tf.pad(image, tf.to_int32([[y_shift, y2_shift], [x_shift, x2_shift], [0, 0]])) image += bg_color gt_x, gt_y, gt_w, gt_h = tf.unstack(gt_bboxes, axis=1) gt_bboxes = tf.stack([gt_x + x_shift/w, gt_y + y_shift/h, gt_w, gt_h], axis=1)/X_out return image, gt_bboxes
def forward(self): pad = [[self.lay.pad, self.lay.pad]] * 2; temp = tf.pad(self.inp.out, [[0, 0]] + pad + [[0, 0]]) k = self.lay.w['kernels'] ksz = self.lay.ksize half = int(ksz / 2) out = list() for i in range(self.lay.h_out): row_i = list() for j in range(self.lay.w_out): kij = k[i * self.lay.w_out + j] i_, j_ = i + 1 - half, j + 1 - half tij = temp[:, i_ : i_ + ksz, j_ : j_ + ksz,:] row_i.append( tf.nn.conv2d(tij, kij, padding = 'VALID', strides = [1] * 4)) out += [tf.concat(row_i, 2)] self.out = tf.concat(out, 1)
def constrained_conv2d(input_, output_dim, k_h=6, k_w=6, d_h=2, d_w=2, stddev=0.02, name="conv2d"): assert k_h % d_h == 0 assert k_w % d_w == 0 # constrained to have stride be a factor of kernel width # this is intended to reduce convolution artifacts with tf.variable_scope(name): w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim], initializer=tf.truncated_normal_initializer(stddev=stddev)) # This is meant to reduce boundary artifacts padded = tf.pad(input_, [[0, 0], [k_h-1, 0], [k_w-1, 0], [0, 0]]) conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME') biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0)) conv = tf.nn.bias_add(conv, biases) return conv
def conv2d(batch_input, out_channels, filter_shape, strides, name="conv"): with tf.variable_scope(name): in_channels = batch_input.get_shape()[1] in_height = batch_input.get_shape()[2] in_width = batch_input.get_shape()[3] kh, kw = filter_shape _, _, sh, sw = strides w = tf.get_variable(name="w", shape=[kh, kw, in_channels, out_channels], dtype=tf.float32, initializer=tf.random_normal_initializer(0, 0.02)) # b = tf.get_variable(name='b', # shape=[out_channels], # initializer=tf.constant_initializer(0.0)) ph = pad_numbers(int(in_height), kh, sh) pw = pad_numbers(int(in_width), kw, sw) padded_input = tf.pad(batch_input, [[0, 0], [0, 0], ph, pw], mode="REFLECT") # conv = tf.nn.bias_add(tf.nn.conv2d(padded_input, w, strides, padding="VALID", data_format="NCHW"), b, data_format="NCHW") conv = tf.nn.conv2d(padded_input, w, strides, padding="VALID", data_format="NCHW") return conv
def encoder(self, x): with tf.variable_scope('encoder'): net = resnet_utils.conv2d_same(x, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) x = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') x_features_all, _ = resnet_v1.resnet_v1(x, self._blocks_encoder, global_pool=False, include_root_block=False, scope=self._resnet_scope) x_features_all = tf.reduce_mean(x_features_all, axis=[1, 2]) x_features_labeled, x_features_unlabeled = tf.split(x_features_all, 2) x_features_tiled = tf.tile(x_features_unlabeled, [self._num_classes, 1]) # (100, 256) --> (2100, 256) x_features = tf.concat([x_features_labeled, x_features_tiled], 0) # (2100, 256) --> (2200, 256) return x_features
def conv2d(input, num_filters, filter_size, stride, reuse=False, pad='SAME', dtype=tf.float32, bias=False): stride_shape = [1, stride, stride, 1] filter_shape = [filter_size, filter_size, input.get_shape()[3], num_filters] w = tf.get_variable('w', filter_shape, dtype, tf.random_normal_initializer(0.0, 0.02)) if pad == 'REFLECT': p = (filter_size - 1) // 2 x = tf.pad(input, [[0,0],[p,p],[p,p],[0,0]], 'REFLECT') conv = tf.nn.conv2d(x, w, stride_shape, padding='VALID') else: assert pad in ['SAME', 'VALID'] conv = tf.nn.conv2d(input, w, stride_shape, padding=pad) if bias: b = tf.get_variable('b', [1,1,1,num_filters], initializer=tf.constant_initializer(0.0)) conv = conv + b return conv
def pad2d(inputs, pad=(0, 0), mode='CONSTANT', data_format='NHWC', trainable=True, scope=None): """2D Padding layer, adding a symmetric padding to H and W dimensions. Aims to mimic padding in Caffe and MXNet, helping the port of models to TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`. Args: inputs: 4D input Tensor; pad: 2-Tuple with padding values for H and W dimensions; mode: Padding mode. C.f. `tf.pad` data_format: NHWC or NCHW data format. """ with tf.name_scope(scope, 'pad2d', [inputs]): # Padding shape. if data_format == 'NHWC': paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]] elif data_format == 'NCHW': paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]] net = tf.pad(inputs, paddings, mode=mode) return net
def _conv(self, inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = 'conv'): """ Spatial Convolution (CONV2D) Args: inputs : Input Tensor (Data Type : NHWC) filters : Number of filters (channels) kernel_size : Size of kernel strides : Stride pad : Padding Type (VALID/SAME) # DO NOT USE 'SAME' NETWORK BUILT FOR VALID name : Name of the block Returns: conv : Output Tensor (Convolved Input) """ with tf.name_scope(name): # Kernel for convolution, Xavier Initialisation kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights') conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding=pad, data_format='NHWC') if self.w_summary: with tf.device('/cpu:0'): tf.summary.histogram('weights_summary', kernel, collections = ['weight']) return conv
def _conv_bn_relu(self, inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = 'conv_bn_relu'): """ Spatial Convolution (CONV2D) + BatchNormalization + ReLU Activation Args: inputs : Input Tensor (Data Type : NHWC) filters : Number of filters (channels) kernel_size : Size of kernel strides : Stride pad : Padding Type (VALID/SAME) # DO NOT USE 'SAME' NETWORK BUILT FOR VALID name : Name of the block Returns: norm : Output Tensor """ with tf.name_scope(name): kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights') conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding='VALID', data_format='NHWC') norm = tf.contrib.layers.batch_norm(conv, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, is_training = self.training) if self.w_summary: with tf.device('/cpu:0'): tf.summary.histogram('weights_summary', kernel, collections = ['weight']) return norm
def _attention_iter(self, inputs, lrnSize, itersize, name = 'attention_iter'): with tf.name_scope(name): numIn = inputs.get_shape().as_list()[3] padding = np.floor(lrnSize/2) pad = tf.pad(inputs, np.array([[0,0],[1,1],[1,1],[0,0]])) U = self._conv(pad, filters=1, kernel_size=3, strides=1) pad_2 = tf.pad(U, np.array([[0,0],[padding,padding],[padding,padding],[0,0]])) sharedK = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([lrnSize,lrnSize, 1, 1]), name= 'shared_weights') Q = [] C = [] for i in range(itersize): if i ==0: conv = tf.nn.conv2d(pad_2, sharedK, [1,1,1,1], padding='VALID', data_format='NHWC') else: conv = tf.nn.conv2d(Q[i-1], sharedK, [1,1,1,1], padding='SAME', data_format='NHWC') C.append(conv) Q_tmp = tf.nn.sigmoid(tf.add_n([C[i], U])) Q.append(Q_tmp) stacks = [] for i in range(numIn): stacks.append(Q[-1]) pfeat = tf.multiply(inputs,tf.concat(stacks, axis = 3) ) return pfeat
def conv2d(inputs, filters, kernel_size = 1, strides = 1, pad = 'VALID', name = None): """ Create a Convolutional Layer args : inputs : (tensor) input Tensor filters : (int) number of filters kernel_size : (int) size of the kernel strides : (int) Value of stride pad : ('VALID'/'SAME') return : tf.Tensor """ with tf.name_scope(name): kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights') conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding=pad, data_format='NHWC') with tf.device('/cpu:0'): tf.summary.histogram('weights_summary', kernel, collections = ['train']) return conv
def convBnrelu(inputs, filters, kernel_size = 1, strides = 1, name = None): """ Create a Convolutional Layer + Batch Normalization + ReLU Activation args : inputs : (tf.Tensor) input Tensor filters : (int) number of filters kernel_size : (int) size of the kernel strides : (int) Value of stride pad : ('VALID'/'SAME') return : tf.Tensor """ with tf.name_scope(name): kernel = tf.Variable(tf.contrib.layers.xavier_initializer(uniform=False)([kernel_size,kernel_size, inputs.get_shape().as_list()[3], filters]), name= 'weights') conv = tf.nn.conv2d(inputs, kernel, [1,strides,strides,1], padding='VALID', data_format='NHWC') norm = tf.contrib.layers.batch_norm(conv, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu, scope = '_bn_relu') with tf.device('/cpu:0'): tf.summary.histogram('weights_summary', kernel, collections = ['train']) return norm
def convBlock(inputs, numOut, name = 'convBlock'): """ Create a Convolutional Block Layer for Residual Units args: inputs : (tf.Tensor) numOut : (int) number of output channels return : tf.Tensor """ # DIMENSION CONSERVED with tf.name_scope(name): norm_1 = tf.contrib.layers.batch_norm(inputs, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu) conv_1 = conv2d(norm_1, int(numOut/2), kernel_size=1, strides=1, pad = 'VALID') norm_2 = tf.contrib.layers.batch_norm(conv_1, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu) pad = tf.pad(norm_2, np.array([[0,0],[1,1],[1,1],[0,0]])) conv_2 = conv2d(pad, int(numOut/2), kernel_size=3, strides=1, pad = 'VALID') norm_3 = tf.contrib.layers.batch_norm(conv_2, 0.9, epsilon=1e-5, activation_fn = tf.nn.relu) conv_3 = conv2d(norm_3, int(numOut), kernel_size=1, strides=1, pad = 'VALID') return conv_3
def _build_residual_layer(self, name, inputs, k, rfsize, blocksize=2, stride=1): # rfsize: receptive field size layer = dict() with tf.variable_scope(name): with tf.variable_scope('layer1'): layer['filters1'] = tf.get_variable('filters1', [rfsize, rfsize, get_shape(inputs)[-1], k]) layer['conv1'] = tf.nn.conv2d(tf.pad(inputs, [[0, 0], [1, 1], [1, 1], [0, 0]], 'REFLECT'), layer['filters1'], strides=[1, stride, stride, 1], padding='VALID') layer['bn1'] = inst_norm(layer['conv1']) layer['fmap1'] = tf.nn.relu(layer['bn1']) with tf.variable_scope('layer2'): layer['filters2'] = tf.get_variable('filters2', [rfsize, rfsize, get_shape(inputs)[-1], k]) layer['conv2'] = tf.nn.conv2d(tf.pad(layer['fmap1'], [[0, 0], [1, 1], [1, 1], [0, 0]], 'REFLECT'), layer['filters2'], strides=[1, stride, stride, 1], padding='VALID') layer['bn2'] = inst_norm(layer['conv2']) # No ReLu here (following http://torch.ch/blog/2016/02/04/resnets.html, as indicated by the authors) layer['fmap2'] = layer['bn2'] + inputs return layer
def get_parameters(): params = tf.contrib.training.HParams( # vocabulary pad="</s>", unk="UNK", eos="</s>", bos="</s>", append_eos=False, # model rnn_cell="LegacyGRUCell", embedding_size=620, hidden_size=1000, maxnum=2, # regularization dropout=0.2, use_variational_dropout=False, label_smoothing=0.1, constant_batch_size=True, batch_size=128, max_length=60, clip_grad_norm=5.0 ) return params
def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None): if stride == 1: return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', scope=scope) else: # ????1?????pad zero?pad zero???kernel_size - 1 # kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) pad_total = kernel_size - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg inputs = tf.pad(inputs, # ??????????? [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, padding='VALID', scope=scope) ########????Blocks???########
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='default'): """Pads the 2nd and 3rd dimensions of a 4D tensor with "padding[0]" and "padding[1]" (resp.) zeros left and right. # Returns A padded 4D tensor. # Raises ValueError: if `dim_ordering` is neither `tf` or `th`. """ if dim_ordering == 'default': dim_ordering = image_dim_ordering() if dim_ordering not in {'th', 'tf'}: raise ValueError('Unknown dim_ordering ' + str(dim_ordering)) if dim_ordering == 'th': pattern = [[0, 0], [0, 0], [padding[0], padding[0]], [padding[1], padding[1]]] else: pattern = [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]] return tf.pad(x, pattern)
def _bbox_to_mask(yy, region_size, dtype): # trim bounding box exeeding region_size on top and left neg_part = tf.nn.relu(-yy[:2]) core = tf.ones(tf.to_int32(tf.round(yy[2:] - neg_part)), dtype=dtype) y1 = tf.maximum(yy[0], 0.) x1 = tf.maximum(yy[1], 0.) y2 = tf.minimum(region_size[0], yy[0] + yy[2]) x2 = tf.minimum(region_size[1], yy[1] + yy[3]) padding = (y1, region_size[0] - y2, x1, region_size[1] - x2) padding = tf.reshape(tf.stack(padding), (-1, 2)) padding = tf.to_int32(tf.round(padding)) mask = tf.pad(core, padding) # trim bounding box exeeding region_size on bottom and right rs = tf.to_int32(tf.round(region_size)) mask = mask[:rs[0], :rs[1]] mask.set_shape((None, None)) return mask
def image_series_summary(tag, imgs, max_timesteps=10): # take only 3 items from the minibatch imgs = imgs[:, :3] # assume img.shape == (T, batch_size, n_obj, H, W, C) # let's log only for 1st obj tf.cond(tf.equal(tf.rank(imgs), 6), lambda: imgs[:, :, 0], lambda: imgs) shape = (max_timesteps,) + tuple(imgs.get_shape()[1:]) nt = tf.shape(imgs)[0] def pad(): paddings = tf.concat(axis=0, values=([[0, max_timesteps - nt]], tf.zeros((len(shape) - 1, 2), tf.int32))) return tf.pad(imgs, paddings) imgs = tf.cond(tf.greater(nt, max_timesteps), lambda: imgs[:max_timesteps], pad) imgs.set_shape(shape) imgs = tf.squeeze(imgs) imgs = tf.unstack(imgs) # concatenate along the columns imgs = tf.concat(axis=2, values=imgs) tf.summary.image(tag, imgs)
def format_input_left_padding(inputs, **kwargs): static_shape = inputs.get_shape() if not static_shape or len(static_shape) != 4: raise ValueError( "Inputs to conv must have statically known rank 4. Shape: " + str(static_shape)) dilation = (1, 1) assert kwargs['filter_size'] is not None filter_size = kwargs['filter_size'] if isinstance(filter_size, int): filter_size = [filter_size, filter_size] if "dilation" in kwargs: dilation_rate = kwargs["dilation"] assert filter_size[0] % 2 == 1 and filter_size[1] % 2 == 1 height_padding = 2 * (filter_size[0] // 2) * dilation[0] cond_padding = tf.cond( tf.equal(tf.shape(inputs)[2], 1), lambda: tf.constant(0), lambda: tf.constant(2 * (filter_size[1] // 2) * dilation[1])) width_padding = 0 if static_shape[2] == 1 else cond_padding padding = [[0, 0], [height_padding, 0], [width_padding, 0], [0, 0]] inputs = tf.pad(inputs, padding) # Set middle two dimensions to None to prevent convolution from complaining inputs.set_shape([static_shape[0], None, None, static_shape[3]]) kwargs["padding"] = "VALID" return inputs, kwargs
def pad_up_to(vector, size, rank): length_diff = tf.reshape(size - tf.shape(vector)[1], shape=(1,)) with tf.control_dependencies([tf.assert_non_negative(length_diff, data=(vector, size, tf.shape(vector)))]): padding = tf.reshape(tf.concat([[0, 0, 0], length_diff, [0,0]*(rank-1)], axis=0), shape=((rank+1), 2)) return tf.pad(vector, padding, mode='constant')
def pad_up_to(vector, size): rank = vector.get_shape().ndims - 1 length_diff = tf.reshape(size - tf.shape(vector)[1], shape=(1,)) with tf.control_dependencies([tf.assert_non_negative(length_diff, data=(vector, size, tf.shape(vector)))]): padding = tf.reshape(tf.concat([[0, 0, 0], length_diff, [0,0]*(rank-1)], axis=0), shape=((rank+1), 2)) return tf.pad(vector, padding, mode='constant')
def add_loss_op(self, result): logits = result.rnn_output with tf.control_dependencies([tf.assert_positive(tf.shape(logits)[1], data=[tf.shape(logits)])]): length_diff = tf.reshape(self.config.max_length - tf.shape(logits)[1], shape=(1,)) padding = tf.reshape(tf.concat([[0, 0, 0], length_diff, [0, 0]], axis=0), shape=(3, 2)) preds = tf.pad(logits, padding, mode='constant') # add epsilon to avoid division by 0 preds = preds + 1e-5 mask = tf.sequence_mask(self.output_length_placeholder, self.config.max_length, dtype=tf.float32) loss = tf.contrib.seq2seq.sequence_loss(preds, self.output_placeholder, mask) with tf.control_dependencies([tf.assert_non_negative(loss, data=[preds, mask], summarize=256*60*300)]): return tf.identity(loss)
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.train, scope=sub_scope+"cluster_bn") return cnn_output, max_frames
def cnn(self, model_input, l2_penalty=1e-8, num_filters=[1024,1024,1024], filter_sizes=[1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.train, scope=sub_scope+"cluster_bn") return cnn_output, max_frames
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) sub_bias = tf.get_variable(sub_scope+"cnn-bias-len%d"%fs, shape=[nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter) + sub_bias) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.train, scope=sub_scope+"cluster_bn") return cnn_output, max_frames
def cnn(self, model_input, l2_penalty=1e-8, num_filters=[1024, 1024, 1024], filter_sizes=[1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) return cnn_output, max_frames
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in range(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) return cnn_output
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [128,128,256], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in xrange(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) return cnn_output
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in xrange(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) return cnn_output
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], sub_scope="", **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in xrange(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable(sub_scope+"cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) cnn_output = slim.batch_norm( cnn_output, center=True, scale=True, is_training=FLAGS.is_training, scope=sub_scope+"cluster_bn") return cnn_output
def cnn(self, model_input, l2_penalty=1e-8, num_filters = [1024, 1024, 1024], filter_sizes = [1,2,3], **unused_params): max_frames = model_input.get_shape().as_list()[1] num_features = model_input.get_shape().as_list()[2] shift_inputs = [] for i in xrange(max(filter_sizes)): if i == 0: shift_inputs.append(model_input) else: shift_inputs.append(tf.pad(model_input, paddings=[[0,0],[i,0],[0,0]])[:,:max_frames,:]) cnn_outputs = [] for nf, fs in zip(num_filters, filter_sizes): sub_input = tf.concat(shift_inputs[:fs], axis=2) sub_filter = tf.get_variable("cnn-filter-len%d"%fs, shape=[num_features*fs, nf], dtype=tf.float32, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), regularizer=tf.contrib.layers.l2_regularizer(l2_penalty)) cnn_outputs.append(tf.einsum("ijk,kl->ijl", sub_input, sub_filter)) cnn_output = tf.concat(cnn_outputs, axis=2) return cnn_output