ValueError:形状为(2,1)的不可广播输出操作数与广播形状(2,2)不匹配

2024-06-12 15:16:40 发布

您现在位置:Python中文网/ 问答频道 /正文

我刚开始学习MLP。我尝试创建的MLP构造函数固定为1个隐藏层,并接受输入层大小、隐藏神经元数量和输出大小。我试着用4个神经元,2个输出神经元来模拟一个3输入1隐藏层。但是我一直在得到这个valueError del_b3 += delta3。我知道它是一个(2,1)+(2,2),但这不应该加起来没有错误吗?不确定我是否误解了任何东西或在代码中犯了任何错误,如果有人能帮助我,我将不胜感激。提前谢谢

守则:

def sigm(z):
    return  1.0/(1.0 + np.exp(-z))

def sigm_deriv(z):
    return sigm(z)*(1.0 - sigm(z))

class XOR_MLP:
    def __init__(self, input_layer_size, num_hidden_neurons, output_size):
        self.train_inputs = np.array([[1,1,0], [1,-1,-1], [-1,1,1], [-1,-1,1],
                                     [0,1,-1], [0,-1,-1], [1,1,1]])
        self.train_outputs = np.array([[1,0],
                                       [0,1],
                                       [1,1],
                                       [1,0],
                                       [1,0],
                                       [1,1],
                                       [1,1]
                                      ])
          
        np.random.seed(23)
        # hidden layer of 2 neurons
        # row number of neurons
        # column number of weights connected to neuron
        self.w2 = np.random.randn(num_hidden_neurons,input_layer_size)
        self.b2 = np.random.randn(num_hidden_neurons,1)
        
        # output layer has 1 neuron
        self.w3 = np.random.randn(output_size,num_hidden_neurons)
        self.b3 = np.random.randn(output_size,1)
        

    def feedforward(self, xs):    
        # here xs is a matrix where each column is an input vector
        # w2.dot(xs) applies the weight matrix w2 to each input at once
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)            
        return a3s

    
    def backprop(self, xs, ys):
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0
        
        for x,y in zip(xs,ys):            
            a1 = x.reshape(3,1)              # convert input vector x into (2,1) column vector
            z2 = self.w2.dot(a1) + self.b2   # so will z2 and a2
            a2 = sigm(z2)                    # column vector shape (2,1)
            
            z3 = self.w3.dot(a2) + self.b3   # a simple number in a (1,1) column vector
            a3 = sigm(z3)                    # so is a3
            
            delta3 = (a3-y) * sigm_deriv(z3)                   # delta3.shape is (1,1)
           
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))  # w3 shape is (1,2), w3.T shape is (2,1)
            print(del_b3)                                                # delta2 is shape (2,1)
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)  # shape (1,1) by (1,2) gives (1,2)
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)  # shape (2,1) by (1,2) gives (2,2)
        
           
            cost += ((a3 - y)**2).sum() 
            
        n = len(ys)  # number of training vectors           
        # get the average change per training input  
        return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
        
    def train(self, epochs, eta):
        xs = self.train_inputs
        ys = self.train_outputs
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(xs,ys)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
            # print(cost[e])
        plt.plot(cost)
        return cost

# MLP Constructor respectively, Input layer size, Number of hidden neurons,
# Output size 
# Fixed hidden layer of 1
xor = XOR_MLP(3,4,2)
xs = xor.train_inputs.T

print(xor.feedforward(xs))

epochs = 1500
c = xor.train(epochs, 3.0)

print(xor.feedforward(xs))

Tags: selfsizenpb2dothiddenb3shape