应为设备类型cuda的对象,但在对_th_mm的调用中为参数#2'mat2'获取了设备类型cpu

2024-06-02 05:26:14 发布

您现在位置:Python中文网/ 问答频道 /正文

我最近访问了一个GPU,并尝试运行我的代码,但出现了一个错误。我读过向层中添加.cuda()可能会有所帮助,但我在没有“qml”部分的情况下运行了类似的代码,并且在不更改层的情况下(我的意思是使用cuda)效果很好。为了简洁起见,我只为神经网络和整个代码循环添加了类。代码中缺少的部分用于其他方面。但是如果需要代码的其余部分,我很乐意包含所有内容

代码:

class DQN(nn.Module):

    def __init__(self, img_height, img_width):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(in_features=img_height * img_width * 3, out_features=12)
        self.fc2 = nn.Linear(in_features=12, out_features=8)
       # self.fc3 = nn.Linear(in_features=10, out_features=8)
        self.clayer_in = torch.nn.Linear(in_features=8, out_features=wires)
        self.clayer_out = torch.nn.Linear(wires, out_dim)

        dev = qml.device('strawberryfields.fock', wires=wires, cutoff_dim=3)
        self.layer_qnode = qml.QNode(layer, dev)

        weights = qml.init.cvqnn_layers_all(n_quantum_layers, wires)
        weight_shapes = {"w{}".format(i): w.shape for i, w in enumerate(weights)}
        
        self.qlayer = qml.qnn.TorchLayer(self.layer_qnode, weight_shapes)

    def forward(self, t):
        t = self.flatten(t)
        t = self.fc1(t)
        t = self.fc2(t)
       # t = self.fc3(t)
        t = self.clayer_in(t)
        t = self.qlayer(t)
        t = self.clayer_out(t)
        t = t.sigmoid()
        return t




device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#print(device)
em = CartPoleEnvManager(device)
strategy = EpsilonGreedyStrategy(eps_start, eps_end, eps_decay)
agent = Agent(strategy, em.num_actions_available(), device)
memory = ReplayMemory(memory_size)
#learning_rate = LearningRate(lr_start,lr_end,lr_decay)
#learn = lr(learning_rate)

policy_net = DQN(em.get_screen_height(), em.get_screen_width()).to(device)
target_net = DQN(em.get_screen_height(), em.get_screen_width()).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval() #tells pytorch that target_net is only used for inference, not training
optimizer = optim.Adam(params=policy_net.parameters(), lr=0.01)

i = 0
episode_durations = []
for episode in range(num_episodes): #iterate over each episode
    program_starts = time.time()
    em.reset()
    state = em.get_state()
    
    for timestep in count():
        action = agent.select_action(state, policy_net)
        reward = em.take_action(action)
        next_state = em.get_state()
        memory.push(Experience(state, action, next_state, reward))
        state = next_state
        #i+=1
        #print(i)
        if memory.can_provide_sample(batch_size):
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.9)
            experiences = memory.sample(batch_size)
            states, actions, rewards, next_states = extract_tensors(experiences)
            
            current_q_values = QValues.get_current(policy_net, states, actions)
            next_q_values = QValues.get_next(target_net, next_states) #will get the max qvalues of the next state, q values of next state are used via next state
            target_q_values = (next_q_values * gamma) + rewards

            loss = F.mse_loss(current_q_values, target_q_values.unsqueeze(1))
            optimizer.zero_grad() # sets the gradiesnt of all weights n biases in policy_net to zero
            loss.backward() #computes gradient of loss with respect to all weights n biases in the policy net
            optimizer.step() # updates the weights n biases with the gradients that were computed form loss.backwards
            scheduler.step()
        if em.done:
            episode_durations.append(timestep)
            plot(episode_durations, 100)
            break
    if episode % target_update == 0:
        target_net.load_state_dict(policy_net.state_dict()) 
    now = time.time()
    print("Episode hat {0} Sekunden gedauert".format(now - program_starts))     
        
em.close()

错误是:

Traceback (most recent call last):
  File "qdqn.py", line 328, in <module>
    loss.backward() #computes gradient of loss with respect to all weights n biases in the policy net
  File "/home/ubuntu/anaconda3/envs/gymm/lib/python3.8/site-packages/torch/tensor.py", line 198, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/ubuntu/anaconda3/envs/gymm/lib/python3.8/site-packages/torch/autograd/__init__.py", line 98, in backward
    Variable._execution_engine.run_backward(
RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat2' in call to _th_mm

Tags: inselftargetgetnetdevicepolicynn