卷积层的偏差真的对测试精度有影响吗？

batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder( tf.float32, shape=(batch_size, image_size, image_size, num_channels)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_test_dataset = tf.constant(test_dataset) # Variables. layer1_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, num_channels, depth], stddev=0.1)) layer1_biases = tf.Variable(tf.zeros([depth])) layer2_weights = tf.Variable(tf.truncated_normal( [patch_size, patch_size, depth, depth], stddev=0.1)) layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth])) layer3_weights = tf.Variable(tf.truncated_normal( [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1)) layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden])) layer4_weights = tf.Variable(tf.truncated_normal( [num_hidden, num_labels], stddev=0.1)) layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels])) # define a Model with bias . def model_with_bias(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer1_biases) conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv + layer2_biases) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) return tf.matmul(hidden, layer4_weights) + layer4_biases # define a Model without bias added in the convolutional layer. def model_without_bias(data): conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv ) # layer1_ bias is not added conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME') hidden = tf.nn.relu(conv) # + layer2_biases) shape = hidden.get_shape().as_list() reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]]) # bias are added only in Fully connected layer(layer 3 and layer 4) hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases) return tf.matmul(hidden, layer4_weights) + layer4_biases # Training computation. logits_with_bias = model_with_bias(tf_train_dataset) loss_with_bias = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits_with_bias)) logits_without_bias = model_without_bias(tf_train_dataset) loss_without_bias = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits_without_bias)) # Optimizer. optimizer_with_bias = tf.train.GradientDescentOptimizer(0.05).minimize(loss_with_bias) optimizer_without_bias = tf.train.GradientDescentOptimizer(0.05).minimize(loss_without_bias) # Predictions for the training, validation, and test data. train_prediction_with_bias = tf.nn.softmax(logits_with_bias) valid_prediction_with_bias = tf.nn.softmax(model_with_bias(tf_valid_dataset)) test_prediction_with_bias = tf.nn.softmax(model_with_bias(tf_test_dataset)) # Predictions for without train_prediction_without_bias = tf.nn.softmax(logits_without_bias) valid_prediction_without_bias = tf.nn.softmax(model_without_bias(tf_valid_dataset)) test_prediction_without_bias = tf.nn.softmax(model_without_bias(tf_test_dataset)) num_steps = 1001 with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() print('Initialized') for step in range(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :, :, :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels} session.run(optimizer_with_bias, feed_dict=feed_dict) session.run(optimizer_without_bias, feed_dict = feed_dict) print('Test accuracy(with bias): %.1f%%' % accuracy(test_prediction_with_bias.eval(), test_labels)) print('Test accuracy(without bias): %.1f%%' % accuracy(test_prediction_without_bias.eval(), test_labels))

1条回答

网友

1楼 · 发布于 2024-05-16 09:42:22

Biases are tuned alongside weights by learning algorithms such as gradient descent. biases differ from weights is that they are independent of the output from previous layers. Conceptually bias is caused by input from a neuron with a fixed activation of 1, and so is updated by subtracting the just the product of the delta value and learning rate.

在一个大的模型中，去掉偏置输入几乎没有什么区别，因为每个节点都可以从其所有输入的平均激活率中得到偏置节点，根据大数定律，这基本上是正常的。在第一层，发生这种情况的能力取决于您的输入分布。例如，对于MNIST，输入的平均激活率大致是恒定的。在小型网络上，当然需要偏差输入，但在大型网络上，删除偏差几乎没有区别。

另见：

Reference

相关问题更多 >

编程相关推荐

热门问题

热门文章