图像修改编码器/解码器开发

2024-04-19 05:32:03 发布

您现在位置:Python中文网/ 问答频道 /正文

在我目前从事的项目中,我的目标是训练一个神经网络,以模拟真实成像过程中的卷积/模糊的方式,将圆的图像转换成椭圆。你知道吗

剩下的就是构造一个神经网络,最好是一个CNN,它具有所需的结果——即获取一个带圆圈的图像作为输入,并返回一个带椭圆的图像。然而,我一直没能做到这一点。到目前为止,我使用的神经网络(包括CNN)最多只能返回模糊的圆图像。我不知道这是神经网络的错误还是我正在使用的预处理代码的错误。你知道吗

我是机器学习的初学者,我读到卷积神经网络是用于图像处理的最佳架构。因此,我试图为此开发一个CNN。有人建议使用编码器/解码器模型来解决这个问题,但我不知道如何做到这一点。你知道吗

#First, importing the necessary modules:

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, Reshape
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
import numpy as np
import pandas as pd
from collections import OrderedDict
import itertools
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import math
from math import sqrt
from keras.models import Model, load_model

#Next, creating and storing the input (circle) and output (ellipse) images:

def create_blank_image(size):
    data = np.ndarray(shape=(size, size))
    for i in range(0, size):
        for j in range(0, size):
            data[[i], [j]] = 0
    #print(data)

    return data

def circle_randomizer():
    number_of_circles = random.randint(4,10)
    intensity = np.ndarray(shape=(128, 128))
    #print(number_of_circles)
    radius_list = []


    for i in range(number_of_circles):
        radius_list.append(random.uniform(8, 10))
    #print(radius_list)

    center_coords = np.zeros((2,1))    
    center_coords[[0],[0]] = random.uniform(0,size)
    center_coords[[1],[0]] = random.uniform(0,size)

    for i in range(number_of_circles):
      #temp_array = np.ndarray(shape=(2,1))
      #temp_array[[0],[0]] = random.uniform(0,size)
      #temp_array[[1],[0]] = random.uniform(0,size)

      if i > 0:
          j = 0
          #print(i,j)
          while j in range(i):
              #print(i,j)
              #print(center_coords)
              temp_array = np.ndarray(shape=(2,1))
              temp_array[[0],[0]] = random.uniform(0,size)
              temp_array[[1],[0]] = random.uniform(0,size)
              #while sqrt((center_coords[[0],[i]] - center_coords[[0],[j]])**2 + (center_coords[[1],[i]] - center_coords[[1],[j]])**2) < radius_list[i] + radius_list[j]:
              while sqrt((temp_array[[0],[0]] - center_coords[[0],[j]])**2 + (temp_array[[1],[0]] - center_coords[[1],[j]])**2) < radius_list[i] + radius_list[j]:               
                  temp_array[[0],[0]] = random.uniform(0,size)
                  temp_array[[1],[0]] = random.uniform(0,size)
                  j = 0
              center_coords = np.concatenate((center_coords,temp_array), axis = 1)          
              j = j + 1
              #print('loop ran ' + str(j) + ' times')

    return radius_list, center_coords

def image_creator(centers, radii, img_data, size):
    x = np.arange(1, size, 1)
    y = np.arange(1, size, 1)

    for c in range(len(centers)):
        x0 = centers[[c],[0]]
        y0 = centers[[c],[1]]
        radius = radii[c]
        for i in range(0, size-1):
            for j in range(0, size-1):
                height2 = radius**2 - (x[i]-x0)**2 - (y[j]-y0)**2
                if height2 >= 0:
                    img_data[[i], [j]] = sqrt(radius**2 - (x[i]-x0)**2 - (y[j]-y0)**2)

    return img_data

def make_ellipses(size, radii, center_coords):
    # idea: use a random number generator to create a random rotation of the x,y axes for the ellipse

    # size is the length of a side of the square
    # length is the length of the ellipse
    # defined as equal to the radius of the circle later

    my_label = np.ndarray(shape=(size, size))
    x = np.arange(1, size, 1)
    y = np.arange(1, size, 1)

    # inefficiently zero the array
    for i in range(0, size):
        for j in range(0, size):
            my_label[[i], [j]] = 0
            # print(my_label)
    for c in range(len(center_coords)):
        x0 = center_coords[[c],[0]]
        y0 = center_coords[[c],[1]]
        #theta = random.uniform(0, 6.28318)
        theta = 0.775

        for i in range(0, size - 1):
            for j in range(0, size - 1):
                xprime = (x[i] - x0) * math.cos(theta) + (y[j] - y0) * math.sin(theta)
                yprime = -(x[i] - x0) * math.sin(theta) + (y[j] - y0) * math.cos(theta)
                height2 = (0.5 * radii[c]) ** 2 - 0.25 * xprime ** 2 - yprime ** 2
                if height2 >= 0:
                    my_label[[i], [j]] = sqrt((0.5 * radii[c]) ** 2 - 0.25 * xprime ** 2 - yprime ** 2)

    return my_label

size = 128
radii, centers = circle_randomizer()
#print(radii)
#print(centers)

#Make labels and samples consistent with rest of code
N = 100
circle_images = []
ellipse_images = []
coords = []
for sample in range(0, N):
    blank_image = create_blank_image(size)
    radii, centers = circle_randomizer()
    temp_image = image_creator(centers, radii, blank_image, size)
    circle_images.append(temp_image)
    temp_output = make_ellipses(size, radii, centers)
    ellipse_images.append(temp_output)
    coords.append(centers)
#print(labels)
#print(samples[0][40])

#Storing the images in files:

filenames = []
for i in range(0,N):
  np.save('ellipses_' + str(i) + '.npy', ellipse_images[i])
  filenames.append('ellipses_' + str(i) + '.npy')
  np.save('circles_' + str(i) + '.npy', circle_images[i])
circles_stack = np.stack(circle_images,axis=0)
ellipses_stack = np.stack(ellipse_images,axis=0)
np.save('ellipses_stack.npy', ellipses_stack)
np.save('circles_stack.npy', circles_stack)

#Loading the images:

# load training images and corresponding "labels"
# training samples
training_images_path = 'circles_stack.npy'
labels_path = 'ellipses_stack.npy'

X = np.load(training_images_path,'r')/20.
y = np.load(labels_path,'r')/20.

#Defining the image preprocessing functions:
#(I'm not sure why preprocessing_X and preprocessing_y are different; this is
#code I've partially adopted from a research paper.)

# Preprocessing for training images
def preprocessing_X(image_data, image_size):
    image_data = image_data.reshape(image_data.shape[0], image_size[0], image_size[1], 1)
    image_data = image_data.astype('float32')
    image_data = (image_data - np.amin(image_data))/(np.amax(image_data) - np.amin(image_data))
    return image_data


​
# preprocessing for "labels" (ground truth)
def preprocessing_Y(image_data, image_size):
    n_images = 0
    label = np.array([])
    for idx in range(image_data.shape[0]):
        img = image_data[idx,:,:]
        n, m = img.shape
        img = np.array(OneHotEncoder(n_values=nb_classes).fit_transform(img.reshape(-1,1)).todense())
        img = img.reshape(n, m, nb_classes)
        label = np.append(label, img)
        n_images += 1
    label_4D = label.reshape(n_images, image_size[0], image_size[1], nb_classes)    
    return label_4D
Preprocessing the images:

# Split into train/test and make the shapes of tensors compatible with tensorflow format
nb_classes = 10
target_size = (128, 128)

#Below line randomizes which images are picked for train/test sets. ~20% will go to test.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train = preprocessing_X(X_train, target_size)
X_test = preprocessing_X(X_test, target_size)
y_train = preprocessing_Y(y_train, target_size)
y_test = preprocessing_Y(y_test, target_size)


#The encoder-decoder model that I'm using right now:
def model_shape(input_img, nb_classes = 2):
   x = Convolution2D(2, (3, 3), activation='relu', padding='same')(input_img)
   x = MaxPooling2D((2, 2), padding='same')(x)
   x = Convolution2D(4, (3, 3), activation='relu', padding='same')(x)
   x = MaxPooling2D((2, 2), padding='same')(x)

   x = Convolution2D(4, (3, 3), activation='relu', padding='same')(x)
   x = UpSampling2D((2, 2))(x)
   x = Convolution2D(2, (3, 3), activation='relu', padding='same')(x) 
   x = UpSampling2D((2, 2))(x)

   x = Convolution2D(nb_classes, (3, 3), activation = 'linear', padding='same')(x)
   x = Convolution2D(nb_classes, (1, 1), activation = 'linear', padding='same')(x)
   #x = Reshape((target_size[0] * target_size[1], nb_classes))(x)
   output = Activation('softmax')(x)

   return Model(input_img, output)

#Defining dice loss:
smooth = 1

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

#Compiling the model:

nb_classes = 2
input_img = Input(shape=(target_size[0], target_size[1], 1)) 

model = model_shape(input_img)
model.compile(optimizer='adam', loss=dice_coef_loss, metrics = [dice_coef])
callback_tb = TensorBoard(log_dir='/tmp/Deconvoluter', histogram_freq=0,
                          write_graph=True, write_images=False)

model.fit(X_train, y_train, epochs=10, batch_size=32, 
          validation_data=(X_test, y_test))

model.save("/content/artificial_label_train.h5")
model.save_weights("/content/artificial_label_train_weights.h5")
print('Saved model and weights to disk.\n')

代码将编译到这一点,但在以下代码块之后返回错误:

# Loading models and obtaining softmax output (pixel-wise predictions)
def get_decoded_imgs(input_imgs, filepath, nb_channels = 2):
    model = load_model(filepath)
    decoded_imgs = model.predict(input_imgs)
    decoded_imgs = decoded_imgs.reshape(input_imgs.shape[0], target_size[0], target_size[1], nb_channels)
    print("FCN output obtained\n")
    return decoded_imgs

decoded_imgs = get_decoded_imgs(X_test, '/content/artificial_label_train.h5')

Outputs = {}
for i in range(X_test.shape[0]):
  decoded_img = decoded_imgs[i,:,:,1]
  #dictionary = OrderedDict()
  Outputs[i] = decoded_img

print('Plotting the results...\n')
plt.figure(figsize=(16, 8), dpi = 96)
for i in range(1, 5):
  FCN_output = Outputs[i-1]
  ax = plt.subplot(3, 5, i)
  plt.imshow(output,cmap='gray')
  print(FCN_output.shape)
  plt.title('output {0}'.format(i), fontsize = 10)
  ax.get_xaxis().set_visible(False)
  ax.get_yaxis().set_visible(False)
  plt.tight_layout()
  print('output' + str(i) + '\n' + str(output))
  print(np.max(y_test[i-1]),np.max(output))
plt.show() 

错误是“未知损失”功能:骰子损失系数“尽管骰子损失系数的定义很明确。你知道吗

输入是圆的图像,训练神经网络的基本真值是椭圆的图像。你知道吗

到目前为止,我使用的最新的编码器-解码器Keras模型如上面的代码所示;它返回~55%的val\u dice\u coeff,但这可能可以通过添加更多的epoch来改进。我曾经使用一个简单的CNN与mse损失,只返回低分辨率图像的输入。你知道吗

我现在遇到的问题是理解为什么我在尝试使用时会出错模型.预测()以骰子损失作为损失函数。你知道吗


Tags: theintestimageimportimgfordata