加载预训练DQN模型时性能下降的问题
背景: 我在做一个强化学习项目,使用了一个叫做深度Q网络(DQN)的模型。在训练完这个模型后,我把它保存了,后来又加载回来进行进一步的评估。但是,我发现加载后的模型表现明显比原来训练好的模型差很多。
问题: 主要的问题似乎出现在加载训练好的模型时;它的表现大幅下降,感觉就像没有从训练中学到任何东西。这让我很困惑,因为在保存之前,模型的表现是很好的。
class DQLAgent():
def __init__(self, env, model_path=None):
self.env = env
self.state_size = env.observation_space.shape[0]
self.action_size = env.action_space.n
self.gamma = 0.95
self.learning_rate = 0.001
self.epsilon = 1
self.epsilon_decay = 0.995
self.epsilon_min = 0.01
self.memory = deque(maxlen=2000)
if model_path:
self.model = load_model(model_path) # Load model if path provided
else:
self.model = self.build_model() # Build new model otherwise
def build_model(self):
model = Sequential()
model.add(Dense(48, input_dim=self.state_size, activation='tanh'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return self.env.action_space.sample()
else:
act_values = self.model.predict(state,verbose = 0)
return np.argmax(act_values[0])
def replay(self, batch_size):
if len(self.memory) < batch_size:
return
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward if done else reward + self.gamma * np.amax(self.model.predict(next_state,verbose = 0)[0])
train_target = self.model.predict(state,verbose = 0)
train_target[0][action] = target
self.model.fit(state, train_target, verbose=0)
def adaptiveEGreedy(self):
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
# Initialize gym environment and the agent
env = gym.make('CartPole-v1')
agent = DQLAgent(env)
episodes = 50
batch_size = 32
round_results = []
for e in range(episodes):
state = env.reset()
state = np.reshape(state, [1, 4])
total_reward = 0
while True:
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
next_state = np.reshape(next_state, [1, 4])
agent.remember(state, action, reward, next_state, done)
state = next_state
agent.replay(batch_size)
agent.adaptiveEGreedy()
total_reward += reward
if done:
print(f'Episode: {e+1}, Total reward: {total_reward}')
round_results.append(total_reward)
break
agent.model.save('dql_cartpole_model.keras')
展示效果
model_path = 'dql_cartpole_model.keras' # Update this path
env = gym.make('CartPole-v1')
agent = DQLAgent(env, model_path=model_path)
episodes = 100
round_results = []
for e in range(episodes):
state = env.reset()
state = np.reshape(state, [1, 4])
total_reward = 0
while True:
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
next_state = np.reshape(next_state, [1, 4])
state = next_state
total_reward += reward
if done:
print(f'Episode: {e+1}, Total reward: {total_reward}')
round_results.append(total_reward)
break
# Plot the rewards
plt.plot(round_results)
plt.title('Rewards per Episode')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.show()
编辑:在DQLAgent类中添加了保存代理状态和加载代理状态的功能,希望能保存epsilon和记忆,但结果还是一样。
def save_agent_state(self, file_path):
with open(file_path, 'wb') as file:
pickle.dump({
'epsilon': self.epsilon,
'memory': list(self.memory) # Convert deque to list before saving
}, file)
def load_agent_state(self, file_path):
with open(file_path, 'rb') as file:
agent_state = pickle.load(file)
self.epsilon = agent_state['epsilon']
self.memory = deque(agent_state['memory'], maxlen=2000)
并在训练结束时添加了这个
agent.model.save('dql_cartpole_model.keras')
agent.save_agent_state('agent_state.pkl')
添加了这个展示效果
model_path = 'dql_cartpole_model.keras' # Update this path
agent.load_agent_state('agent_state.pkl')
1 个回答
-1
当你用 agent = DQLAgent(env, model_path=model_path)
来加载你的模型时,self.epsilon
会被设置回 1
。这意味着模型总是选择随机的动作,尤其是在训练的前几个阶段,这种随机性会很明显。为了避免这种情况,你应该把 epsilon
和模型一起保存,或者为训练好的模型设置一个评估标志,这样模型就会始终选择最佳动作,而不是随机尝试。
在你现在的实现中,模型还会失去它的 memory
,所以如果你保存并加载模型后想继续训练,它基本上会从头开始,epsilon
和 memory
都会重置为0。