我正在尝试使用tf.nn.sparse_softmax_cross_entropy_with_logits,但我已遵循用户Olivier Moindrot的回答[此处] [1],但出现尺寸错误
tf.nn.sparse_softmax_cross_entropy_with_logits
我正在建立一个细分网络,因此输入图像为200x200,输出图像为200x200。分类是二进制的,因此是前景和背景。
建立CNN之后 pred = conv_net(x, weights, biases, keep_prob)
pred = conv_net(x, weights, biases, keep_prob)
pred 看起来像这样 <tf.Tensor 'Add_1:0' shape=(?, 40000) dtype=float32>
pred
<tf.Tensor 'Add_1:0' shape=(?, 40000) dtype=float32>
CNN具有几个转换层,然后是一个完全连接的层。完全连接的层是40000,因为它将200x200展平。
根据上面的链接,我pred像这样重塑…
(旁注:我也尝试将tf.pack()两个包装pred在一起,就像上面一样,但我认为那是错误的)
tf.pack()
pred = tf.reshape(pred, [-1, 200, 200, 2])
…因此有2个分类。继续以上链接…
temp_pred = tf.reshape(pred, [-1,2]) temp_y = tf.reshape(y, [-1]) cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
我有以下占位符和批处理数据…
x = tf.placeholder(tf.float32, [None, 200, 200]) y = tf.placeholder(tf.int64, [None, 200, 200]) (Pdb) batch_x.shape (10, 200, 200) (Pdb) batch_y.shape (10, 200, 200)
在运行培训课程时,出现以下尺寸错误:
tensorflow.python.framework.errors.InvalidArgumentError: logits first dimension must match labels size. logits shape=[3200000,2] labels shape=[400000]
我的完整代码如下所示:
import tensorflow as tf import pdb import numpy as np # Import MINST data # from tensorflow.examples.tutorials.mnist import input_data # mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) # Parameters learning_rate = 0.001 training_iters = 200000 batch_size = 10 display_step = 1 # Network Parameters n_input = 200 # MNIST data input (img shape: 28*28) n_classes = 2 # MNIST total classes (0-9 digits) n_output = 40000 #n_input = 200 dropout = 0.75 # Dropout, probability to keep units # tf Graph input x = tf.placeholder(tf.float32, [None, n_input, n_input]) y = tf.placeholder(tf.int64, [None, n_input, n_input]) keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) # Create some wrappers for simplicity def conv2d(x, W, b, strides=1): # Conv2D wrapper, with bias and relu activation x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') x = tf.nn.bias_add(x, b) return tf.nn.relu(x) def maxpool2d(x, k=2): # MaxPool2D wrapper return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') # Create model def conv_net(x, weights, biases, dropout): # Reshape input picture x = tf.reshape(x, shape=[-1, 200, 200, 1]) # Convolution Layer conv1 = conv2d(x, weights['wc1'], biases['bc1']) # Max Pooling (down-sampling) # conv1 = tf.nn.local_response_normalization(conv1) # conv1 = maxpool2d(conv1, k=2) # Convolution Layer conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) # Max Pooling (down-sampling) # conv2 = tf.nn.local_response_normalization(conv2) # conv2 = maxpool2d(conv2, k=2) # Convolution Layer conv3 = conv2d(conv2, weights['wc3'], biases['bc3']) # # Max Pooling (down-sampling) # conv3 = tf.nn.local_response_normalization(conv3) # conv3 = maxpool2d(conv3, k=2) # return conv3 # Fully connected layer # Reshape conv2 output to fit fully connected layer input fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) fc1 = tf.nn.relu(fc1) # Apply Dropout fc1 = tf.nn.dropout(fc1, dropout) return tf.add(tf.matmul(fc1, weights['out']), biases['out']) # Output, class prediction # output = [] # for i in xrange(2): # # output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out']))) # output.append((tf.add(tf.matmul(fc1, weights['out']), biases['out']))) # # return output # Store layers weight & bias weights = { # 5x5 conv, 1 input, 32 outputs 'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])), # 5x5 conv, 32 inputs, 64 outputs 'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])), # 5x5 conv, 32 inputs, 64 outputs 'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])), # fully connected, 7*7*64 inputs, 1024 outputs 'wd1': tf.Variable(tf.random_normal([50*50*64, 1024])), # 1024 inputs, 10 outputs (class prediction) 'out': tf.Variable(tf.random_normal([1024, n_output])) } biases = { 'bc1': tf.Variable(tf.random_normal([32])), 'bc2': tf.Variable(tf.random_normal([64])), 'bc3': tf.Variable(tf.random_normal([128])), 'bd1': tf.Variable(tf.random_normal([1024])), 'out': tf.Variable(tf.random_normal([n_output])) } # Construct model pred = conv_net(x, weights, biases, keep_prob) pdb.set_trace() # pred = tf.pack(tf.transpose(pred,[1,2,0])) pred = tf.reshape(pred, [-1, n_input, n_input, 2]) temp_pred = tf.reshape(pred, [-1,2]) temp_y = tf.reshape(y, [-1]) # Define loss and optimizer cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model # correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) temp_pred2 = tf.reshape(pred, [-1,n_input,n_input]) correct_pred = tf.equal(tf.cast(y,tf.float32),tf.sub(temp_pred2,tf.cast(y,tf.float32))) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Initializing the variables init = tf.initialize_all_variables() # Launch the graph with tf.Session() as sess: sess.run(init) summ = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def) step = 1 from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data data = scroll_data.read_data('/home/kendall/Desktop/') # Keep training until reach max iterations while step * batch_size < training_iters: batch_x, batch_y = data.train.next_batch(batch_size) # Run optimization op (backprop) batch_x = batch_x.reshape((batch_size, n_input, n_input)) batch_y = batch_y.reshape((batch_size, n_input, n_input)) batch_y = np.int64(batch_y) # y = tf.reshape(y, [-1,n_input,n_input]) pdb.set_trace() sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout}) if step % display_step == 0: # Calculate batch loss and accuracy pdb.set_trace() loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.}) print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc) step += 1 print "Optimization Finished!" # Calculate accuracy for 256 mnist test images print "Testing Accuracy:", \ sess.run(accuracy, feed_dict={x: data.test.images[:256], y: data.test.labels[:256], keep_prob: 1.})
让我们忘掉softmax并tf.nn.sigmoid_cross_entropy_with_logits在此处使用更简单的方法:
tf.nn.sigmoid_cross_entropy_with_logits
预测和目标的形状应为 [batch_size, 40000]
[batch_size, 40000]
pred = conv_net(x, weights, biases, keep_prob) # shape [batch_size, 40000] flattened_y = tf.reshape(y, [-1, 40000]) # shape [batch_size, 40000]
loss = tf.nn.sigmoid_cross_entropy_with_logits(pred, flattened_y)