使用Keras绘制梯度下降曲线

Question

我在Keras中实现了以下代码，使用加州房价数据集，试图绘制theta 1和theta 2的值，以及随机梯度下降、批量梯度下降或小批量梯度下降的选择如何影响结果：

import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

# Load California housing dataset
california_housing = fetch_california_housing()
X, y = california_housing.data, california_housing.target

# Normalize features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

def build_model():
    model = Sequential([
    Dense(64, activation="relu",input_shape=(8,)),
    Dense(64, activation="relu"),
    Dense(1)
    ])
    #model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
    return model


sgd_optimizer = SGD(lr=0.001)  # Stochastic Gradient Descent
minibatch_sgd_optimizer = SGD(lr=0.001)  # Mini-batch Gradient Descent
batch_sgd_optimizer = SGD(lr=0.001)  # Batch Gradient Descent


# Compile the model
model=build_model()
model.compile(loss='mse', optimizer=sgd_optimizer)

theta1_sgd, theta2_sgd = [], []
theta1_minibatch_sgd, theta2_minibatch_sgd = [], []
theta1_batch_sgd, theta2_batch_sgd = [], []


# Function to perform gradient descent and store theta values
def perform_gradient_descent(optimizer, batch_size=None):
    theta1_list, theta2_list = [], []
    loss_history = []
    for _ in range(5):  # Number of epochs
        history=model.fit(X_normalized, y, epochs=1, batch_size=batch_size, verbose=0)
        loss_history.append(history.history['loss'][0])
        weights = model.layers[0].get_weights()[0].flatten()  # Get current theta values
        theta1_list.append(weights[0])
        theta2_list.append(weights[1])
    print(theta1_list," ",theta2_list)
    return loss_history,theta1_list, theta2_list

# Perform gradient descent with different optimizers
loss_sgd, theta1_sgd, theta2_sgd = perform_gradient_descent(sgd_optimizer, batch_size=1)  # Stochastic Gradient Descent
loss_minibatch_sgd, theta1_minibatch_sgd, theta2_minibatch_sgd = perform_gradient_descent(minibatch_sgd_optimizer, batch_size=32)  # Mini-batch Gradient Descent
loss_batch_sgd, theta1_batch_sgd, theta2_batch_sgd = perform_gradient_descent(batch_sgd_optimizer, batch_size=len(X_normalized))  # Batch Gradient Descent

# Plotting the loss versus number of epochs
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(loss_sgd) + 1), loss_sgd, label='Stochastic Gradient Descent')
plt.plot(range(1, len(loss_minibatch_sgd) + 1), loss_minibatch_sgd, label='Mini-batch Gradient Descent')
plt.plot(range(1, len(loss_batch_sgd) + 1), loss_batch_sgd, label='Batch Gradient Descent')
plt.xlabel('Number of Epochs')
plt.ylabel('Loss')
plt.title('Loss vs. Number of Epochs')
plt.legend()
plt.grid(True)
plt.show()

# Plotting the gradient descent trajectories
plt.figure(figsize=(10, 6))
plt.plot(theta1_sgd, theta2_sgd, label='Stochastic Gradient Descent', marker='o')
plt.plot(theta1_minibatch_sgd, theta2_minibatch_sgd, label='Mini-batch Gradient Descent', marker='s')
plt.plot(theta1_batch_sgd, theta2_batch_sgd, label='Batch Gradient Descent', marker='x')
plt.xlabel('Theta 1')
plt.ylabel('Theta 2')
plt.title('Gradient Descent Trajectories')
#plt.xlim(-0.08, -0.05)  # Set limit for Theta 1
#plt.ylim(0.02, 0.03)  # Set limit for Theta 2
plt.legend()
plt.grid(True)
plt.show()

不过，我发现一个问题，有时候存放theta值的列表中的值是Nan（不是一个数字），而有时候又是正常的值。我注意到当训练的轮数超过10次时，会出现这种情况，这是什么原因呢？

keras 梯度下降训练轮数随机梯度下降加州房价数据集

使用Keras绘制梯度下降曲线

1 个回答

撰写回答