我正在尝试使用ENet深度学习模型分割道路和非道路部分。我使用这个github链接:https://github.com/kwotsin/TensorFlow-ENet,它的原始图像大小为340X480,对于340X480或更高的图像工作正常,但一旦我重新调整图像大小,它就不工作了。它显示了随机黑白像素的破坏图像。即使我试图减少大小在相同的纵横比,但它仍然不起作用。你知道吗
这是我的ENet模型结构代码:
#Now actually start building the network
def ENet(inputs,
num_classes,
batch_size,
num_initial_blocks=1,
stage_two_repeat=2,
skip_connections=True,
reuse=None,
is_training=True,
scope='ENet'):
'''
The ENet model for real-time semantic segmentation!
INPUTS:
- inputs(Tensor): a 4D Tensor of shape [batch_size, image_height, image_width, num_channels] that represents one batch of preprocessed images.
- num_classes(int): an integer for the number of classes to predict. This will determine the final output channels as the answer.
- batch_size(int): the batch size to explictly set the shape of the inputs in order for operations to work properly.
- num_initial_blocks(int): the number of times to repeat the initial block.
- stage_two_repeat(int): the number of times to repeat stage two in order to make the network deeper.
- skip_connections(bool): if True, add the corresponding encoder feature maps to the decoder. They are of exact same shapes.
- reuse(bool): Whether or not to reuse the variables for evaluation.
- is_training(bool): if True, switch on batch_norm and prelu only during training, otherwise they are turned off.
- scope(str): a string that represents the scope name for the variables.
OUTPUTS:
- net(Tensor): a 4D Tensor output of shape [batch_size, image_height, image_width, num_classes], where each pixel has a one-hot encoded vector
determining the label of the pixel.
'''
#Set the shape of the inputs first to get the batch_size information
inputs_shape = inputs.get_shape().as_list()
inputs.set_shape(shape=(batch_size, inputs_shape[1], inputs_shape[2], inputs_shape[3]))
with tf.variable_scope(scope, reuse=reuse):
#Set the primary arg scopes. Fused batch_norm is faster than normal batch norm.
with slim.arg_scope([initial_block, bottleneck], is_training=is_training),\
slim.arg_scope([slim.batch_norm], fused=True), \
slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=None):
#=================INITIAL BLOCK=================
net = initial_block(inputs, scope='initial_block_1')
for i in xrange(2, max(num_initial_blocks, 1) + 1):
net = initial_block(net, scope='initial_block_' + str(i))
#Save for skip connection later
if skip_connections:
net_one = net
#===================STAGE ONE=======================
net, pooling_indices_1, inputs_shape_1 = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, downsampling=True, scope='bottleneck1_0')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_1')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_2')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_3')
net = bottleneck(net, output_depth=64, filter_size=3, regularizer_prob=0.01, scope='bottleneck1_4')
#Save for skip connection later
if skip_connections:
net_two = net
#regularization prob is 0.1 from bottleneck 2.0 onwards
with slim.arg_scope([bottleneck], regularizer_prob=0.1):
net, pooling_indices_2, inputs_shape_2 = bottleneck(net, output_depth=128, filter_size=3, downsampling=True, scope='bottleneck2_0')
#Repeat the stage two at least twice to get stage 2 and 3:
for i in xrange(2, max(stage_two_repeat, 2) + 2):
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_1')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=2, scope='bottleneck'+str(i)+'_2')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_3')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=4, scope='bottleneck'+str(i)+'_4')
net = bottleneck(net, output_depth=128, filter_size=3, scope='bottleneck'+str(i)+'_5')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=8, scope='bottleneck'+str(i)+'_6')
net = bottleneck(net, output_depth=128, filter_size=5, asymmetric=True, scope='bottleneck'+str(i)+'_7')
net = bottleneck(net, output_depth=128, filter_size=3, dilated=True, dilation_rate=16, scope='bottleneck'+str(i)+'_8')
with slim.arg_scope([bottleneck], regularizer_prob=0.1, decoder=True):
#===================STAGE FOUR========================
bottleneck_scope_name = "bottleneck" + str(i + 1)
#The decoder section, so start to upsample.
net = bottleneck(net, output_depth=64, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_2, output_shape=inputs_shape_2, scope=bottleneck_scope_name+'_0')
#Perform skip connections here
if skip_connections:
net = tf.add(net, net_two, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_1')
net = bottleneck(net, output_depth=64, filter_size=3, scope=bottleneck_scope_name+'_2')
#===================STAGE FIVE========================
bottleneck_scope_name = "bottleneck" + str(i + 2)
net = bottleneck(net, output_depth=16, filter_size=3, upsampling=True,
pooling_indices=pooling_indices_1, output_shape=inputs_shape_1, scope=bottleneck_scope_name+'_0')
#perform skip connections here
if skip_connections:
net = tf.add(net, net_one, name=bottleneck_scope_name+'_skip_connection')
net = bottleneck(net, output_depth=16, filter_size=3, scope=bottleneck_scope_name+'_1')
#=============FINAL CONVOLUTION=============
logits = slim.conv2d_transpose(net, num_classes, [2,2], stride=2, scope='fullconv')
probabilities = tf.nn.softmax(logits, name='logits_to_softmax')
return logits, probabilities
下面是代码的完整链接:https://github.com/kwotsin/TensorFlow-ENet/blob/master/enet.py
预测分段代码:
image_dir = './dataset/test/'
images_list = sorted([os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith('.png')])
checkpoint_dir = "log/original/"
listi = os.listdir(checkpoint_dir)
print(images_list)
checkpoint = tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original")
print(tf.train.latest_checkpoint("/home/nikhil_m/TensorFlow-ENet/log/original"),'-DDD--------------------------------------++++++++++++++++++++++++++++++++++++++++++++++++++++')
num_initial_blocks = 1
skip_connections = False
stage_two_repeat = 2
'''
#Labels to colours are obtained from here:
https://github.com/alexgkendall/SegNet-Tutorial/blob/c922cc4a4fcc7ce279dd998fb2d4a8703f34ebd7/Scripts/test_segmentation_camvid.py
However, the road_marking class is collapsed into the road class in the dataset provided.
Classes:
------------
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]
'''
label_to_colours = {0: [128,128,128],
1: [0, 0, 0]}
#Create the photo directory
photo_dir = checkpoint_dir + "/test_images"
if not os.path.exists(photo_dir):
os.mkdir(photo_dir)
#Create a function to convert each pixel label to colour.
def grayscale_to_colour(image):
print 'Converting image...'
image = image.reshape((256, 256, 1))
image = np.repeat(image, 3, axis=-1)
for i in xrange(image.shape[0]):
for j in xrange(image.shape[1]):
label = int(image[i][j][0])
image[i][j] = np.array(label_to_colours[label])
return image
with tf.Graph().as_default() as graph:
images_tensor = tf.train.string_input_producer(images_list, shuffle=False)
reader = tf.WholeFileReader()
key, image_tensor = reader.read(images_tensor)
image = tf.image.decode_png(image_tensor, channels=3)
print(image.shape, 'newwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww shapeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee')
# image = tf.image.resize_image_with_crop_or_pad(image, 360, 480)
# image = tf.cast(image, tf.float32)
image = preprocess(image)
images = tf.train.batch([image], batch_size = 10, allow_smaller_final_batch=True)
#Create the model inference
with slim.arg_scope(ENet_arg_scope()):
logits, probabilities = ENet(images,
num_classes=2,
batch_size=10,
is_training=True,
reuse=None,
num_initial_blocks=num_initial_blocks,
stage_two_repeat=stage_two_repeat,
skip_connections=skip_connections)
variables_to_restore = slim.get_variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
def restore_fn(sess):
return saver.restore(sess, checkpoint)
predictions = tf.argmax(probabilities, -1)
predictions = tf.cast(predictions, tf.float32)
print 'HERE', predictions.get_shape()
sv = tf.train.Supervisor(logdir=None, init_fn=restore_fn)
with sv.managed_session() as sess:
for i in xrange(len(images_list) / 10):
segmentations = sess.run(predictions)
# print segmentations.shape
print(segmentations.shape, 'shape')
for j in xrange(segmentations.shape[0]):
converted_image = grayscale_to_colour(segmentations[j])
print 'Saving image %s/%s' %(i*10 + j, len(images_list))
#plt.axis('off')
#plt.imshow(converted_image)
imsave(photo_dir + "/image_%s.png" %(i*10 + j), converted_image)
# plt.show()
下面是完整的代码链接:https://github.com/kwotsin/TensorFlow-ENet/blob/master/predict_segmentation.py
你可以试试这个型号。它是用英文写的克拉斯特遣部队你知道吗
相关问题 更多 >
编程相关推荐