返回图像的Tensorflow卷积网络(无logits)

2024-04-19 18:28:10 发布

您现在位置:Python中文网/ 问答频道 /正文

我承担了一个项目,其中我必须使用卷积网络,将输出图像,而不是logit类预测。为此,我从https://github.com/aymericdamien/TensorFlow-Examples下载了CNN代码

我的输入数据是从二进制文件中读取的64x64图像。二进制文件由两个64x64图像的顺序记录组成。我需要最小化一个代价函数,它是第二个图像和网络的64x64输出的差。你知道吗

这是我编写的用于读取输入数据的模块:

 import tensorflow as tf

 # various initialization variables
 BATCH_SIZE = 128
 N_FEATURES = 9

 # This function accepts a tensor of size [batch_size, 2 ,record_size]
 # and segments in into two tensors of size [batch_size, record] along  the second dimension
 # IMPORTANT: to be executed within an active session
 def segment_batch(batch_p, batch_size, n_input):
   batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input])    #   optical data tensor
   batch_ys = tf.slice(batch_p, [0,1,0],  [batch_size,1,n_input])           # GT data tensor
   optical = tf.reshape([batch_xs], [batch_size, n_input])
   gt = tf.reshape([batch_ys], [batch_size, n_input])

   return [optical, gt]



def batch_generator(filenames, record_size, batch_size):
  """ filenames is the list of files you want to read from. 
  record_bytes: The size of a record in bytes
  batch_size: The size a data batch (examples/batch)
  """

  filename_queue = tf.train.string_input_producer(filenames)
  reader = tf.FixedLengthRecordReader(record_bytes=2*record_size) #     record size is double the value given (optical + ground truth images)
  _, value = reader.read(filename_queue)


  # read in the data (UINT8)
  content = tf.decode_raw(value, out_type=tf.uint8) 



  # The bytes read  represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
  # read optical data slice
  depth_major = tf.reshape(
    tf.strided_slice(content, [0],
                   [record_size]),
    [1, 64, 64])

  # read GT (ground truth) data slice
  depth_major1 = tf.reshape(
    tf.strided_slice(content, [record_size],
                   [2*record_size]),
    [1, 64, 64])

  # Optical data
  # Convert from [depth, height, width] to [height, width, depth].
  uint8image = tf.transpose(depth_major, [1, 2, 0])
  uint8image = tf.reshape(uint8image, [record_size])    # reshape into a    single-dimensional vector
  uint8image = tf.cast(uint8image, tf.float32)  # cast into a float32
  uint8image = uint8image/255   # normalize

  # Ground Truth data
  # Convert from [depth, height, width] to [height, width, depth].
  gt_image = tf.transpose(depth_major1, [1, 2, 0])
  gt_image = tf.reshape(gt_image, [record_size])    # reshape into a single-dimensional vector
  gt_image = tf.cast(gt_image, tf.float32)  # cast into a float32
  gt_image = gt_image/255 # normalize

  # stack them into a single features tensor
  features = tf.stack([uint8image, gt_image])

  # minimum number elements in the queue after a dequeue, used to ensure 
  # that the samples are sufficiently mixed
  # I think 10 times the BATCH_SIZE is sufficient
  min_after_dequeue = 10 * batch_size

  # the maximum number of elements in the queue
  capacity = 20 * batch_size

  # shuffle the data to generate BATCH_SIZE sample pairs
  data_batch = tf.train.shuffle_batch([features], batch_size=batch_size, 
                                    capacity=capacity,    min_after_dequeue=min_after_dequeue)

  return data_batch

这是我实现的主要代码:

 from __future__ import print_function

 # Various initialization variables
 DATA_PATH_OPTICAL_TRAIN = 'data/building_ground_truth_for_training.bin'
 DATA_PATH_EVAL = 'data/building_ground_truth_for_eval.bin'

 import tensorflow as tf
 import numpy as np
 import matplotlib.pyplot as plt
 import time

 # custom imports
 import data_reader2


 # Parameters
 learning_rate = 0.001
 training_iters = 200000
 batch_size = 128
 epochs = 10
 display_step = 10
 rows = 64
 cols = 64

 # Network Parameters
 n_input = 4096 # optical image data (img shape: 64*64)
 n_classes = 4096 # output is an image of same resolution as initial image
 dropout = 0.75 # Dropout, probability to keep units

 # input data parameters
 record_size = 64**2
 total_bytes_of_optical_binary_file = 893329408 # total size of binary file containing training data ([64z64 optical] [64x64 GT])

 # create the data batches (queue)
 # Accepts two parameters. The tensor containing the binary files and the size of a record
 data_batch = data_reader2.batch_generator([DATA_PATH_OPTICAL_TRAIN],record_size, batch_size)   # train set
 data_batch_eval = data_reader2.batch_generator([DATA_PATH_EVAL],record_size, batch_size)   # train set

 ##############################################################
 ######################### FUNCTIONS ##########################
 ##############################################################

 # extract optical array from list
 # A helper function. Data returned from segment_batch is a list which contains two arrays.
 # The first array contains the optical data while the second contains the ground truth data
 def extract_optical_from_list(full_batch):
   optical = full_batch[0]  # extract array from list
   return optical

 # extract ground truth array from list
 # A helper function. Data returned from segment_batch is a list which contains two arrays.
 # The first array contains the optical data while the second contains the ground truth data
 def extract_gt_from_list(full_batch):
   gt = full_batch[1]   # extract array from list
   return gt

 # This function accepts a tensor of size [batch_size, 2 ,record_size]
 # and segments in into two tensors of size [batch_size, record] along the second dimension
 # IMPORTANT: to be executed within an active session
 def segment_batch(batch_p):
    batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input])   # optical data tensor
    batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input])           # GT data tensor
    optical = tf.reshape([batch_xs], [batch_size, n_input])
    gt = tf.reshape([batch_ys], [batch_size, n_input])

    return [optical, gt]

 # Create some wrappers for simplicity
 def conv2d(x, W, b, strides=1):
 # Conv2D wrapper, with bias and relu activation
   x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
   x = tf.nn.bias_add(x, b)
   return tf.nn.relu(x)


 def maxpool2d(x, k=2):
   # MaxPool2D wrapper
   return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                      padding='SAME')


 # Create model
 def conv_net(x, weights, biases, dropout):
   # Reshape input picture into 64x64 subimages [rows, rows, cols, channels]
   x1 = tf.reshape(x, shape=[-1, rows, cols, 1])    # this is the 4-dimensional that tf.conv2D expects as Input

   # Convolution Layer
   conv1 = conv2d(x1, weights['wc1'], biases['bc1'])
   # Max Pooling (down-sampling)
   conv1 = maxpool2d(conv1, k=2)

   # Convolution Layer
   conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
   # Max Pooling (down-sampling)
   conv2 = maxpool2d(conv2, k=2)

   # Fully connected layer
   # Reshape conv2 output to fit fully connected layer input
   fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
   fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
   fc1 = tf.nn.relu(fc1)
   # Apply Dropout
   #fc1 = tf.nn.dropout(fc1, dropout)

   # Output image (edge), prediction
   out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])

   # Add print operation
   out = tf.Print(out, [out], message="This is out: ")

   return [out, x]

 # Store layers weight & bias
 weights = {
   # 5x5 conv, 1 input, 32 outputs
   'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
   # 5x5 conv, 32 inputs, 64 outputs
   'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
   # fully connected, 7*7*64 inputs, 1024 outputs
   'wd1': tf.Variable(tf.random_normal([16*16*64, 1024])),
   # 1024 inputs, 10 outputs (class prediction)
   'out': tf.Variable(tf.random_normal([1024, n_classes]))
 }

 biases = {
   'bc1': tf.Variable(tf.random_normal([32])),
   'bc2': tf.Variable(tf.random_normal([64])),
   'bd1': tf.Variable(tf.random_normal([1024])),
   'out': tf.Variable(tf.random_normal([n_classes]))
 }


 ####################################################################
 ##################### PLACEHOLDERS #################################
 ####################################################################
 # tf Graph input (only pictures)
 X =   tf.placeholder_with_default(extract_optical_from_list(segment_batch(data_batch)), [batch_size, n_input])
 ####################################################################
 ##################### END OF PLACEHOLDERS ##########################
 ####################################################################

 # tf Graph input
 keep_prob = tf.Variable(dropout) #dropout (keep probability)

 # Construct model
 pred = conv_net(extract_optical_from_list(X), weights, biases, keep_prob)  # x[0] is the optical data
 y_true = extract_gt_from_list(extract_gt_from_list(X)) # y_true is the ground truth data

 # Define loss and optimizer
 cost = tf.reduce_mean(tf.pow(y_true - pred[0], 2))
 optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)


 # Initializing the variables
 init = tf.global_variables_initializer()

 # Launch the graph
 with tf.Session() as sess:
   sess.run(init)
   step = 1
   # Keep  training until reach max iterations
   while step * batch_size < training_iters:
     print("Optimizing")
     sess.run(optimizer)
     print("Iter " + str(step*batch_size))
     step += 1
 print("Optimization Finished!")

在对张量的形状做了很多调整之后,我设法纠正了语法错误。不幸的是,它只是在开始执行图的优化部分时挂起。因为我没有办法调试这个(发现关于使用Tensorflow调试器的信息非常少),我真的不知道哪里出了问题!如果有人对Tensorflow有更多的经验,可以指出这段代码有什么问题,这将对我有很大帮助。你知道吗

提前谢谢


Tags: ofthefromimagegtinputdatasize
1条回答
网友
1楼 · 发布于 2024-04-19 18:28:10

您需要启动队列运行程序来从队列中获取优化所需的数据。你知道吗

....
coord = tf.train.Coordinator()
with tf.Session() as sess:
    sess.run(init)
    tf.train.start_queue_runners(sess=sess, coord=coord)
    ....
# also use tf.nn.sparse_softmax_cross_entropy_with_logits for cost

相关问题 更多 >