我刚开始学习MLP。我尝试创建的MLP构造函数固定为1个隐藏层,并接受输入层大小、隐藏神经元数量和输出大小。我试着用4个神经元,2个输出神经元来模拟一个3输入1隐藏层。但是我一直在得到这个valueError del_b3 += delta3
。我知道它是一个(2,1)+(2,2),但这不应该加起来没有错误吗?不确定我是否误解了任何东西或在代码中犯了任何错误,如果有人能帮助我,我将不胜感激。提前谢谢
守则:
def sigm(z):
return 1.0/(1.0 + np.exp(-z))
def sigm_deriv(z):
return sigm(z)*(1.0 - sigm(z))
class XOR_MLP:
def __init__(self, input_layer_size, num_hidden_neurons, output_size):
self.train_inputs = np.array([[1,1,0], [1,-1,-1], [-1,1,1], [-1,-1,1],
[0,1,-1], [0,-1,-1], [1,1,1]])
self.train_outputs = np.array([[1,0],
[0,1],
[1,1],
[1,0],
[1,0],
[1,1],
[1,1]
])
np.random.seed(23)
# hidden layer of 2 neurons
# row number of neurons
# column number of weights connected to neuron
self.w2 = np.random.randn(num_hidden_neurons,input_layer_size)
self.b2 = np.random.randn(num_hidden_neurons,1)
# output layer has 1 neuron
self.w3 = np.random.randn(output_size,num_hidden_neurons)
self.b3 = np.random.randn(output_size,1)
def feedforward(self, xs):
# here xs is a matrix where each column is an input vector
# w2.dot(xs) applies the weight matrix w2 to each input at once
a2s = sigm(self.w2.dot(xs) + self.b2)
a3s = sigm(self.w3.dot(a2s) + self.b3)
return a3s
def backprop(self, xs, ys):
del_w2 = np.zeros(self.w2.shape, dtype=float)
del_b2 = np.zeros(self.b2.shape, dtype=float)
del_w3 = np.zeros(self.w3.shape, dtype=float)
del_b3 = np.zeros(self.b3.shape, dtype=float)
cost = 0.0
for x,y in zip(xs,ys):
a1 = x.reshape(3,1) # convert input vector x into (2,1) column vector
z2 = self.w2.dot(a1) + self.b2 # so will z2 and a2
a2 = sigm(z2) # column vector shape (2,1)
z3 = self.w3.dot(a2) + self.b3 # a simple number in a (1,1) column vector
a3 = sigm(z3) # so is a3
delta3 = (a3-y) * sigm_deriv(z3) # delta3.shape is (1,1)
delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3)) # w3 shape is (1,2), w3.T shape is (2,1)
print(del_b3) # delta2 is shape (2,1)
del_b3 += delta3
del_w3 += delta3.dot(a2.T) # shape (1,1) by (1,2) gives (1,2)
del_b2 += delta2
del_w2 += delta2.dot(a1.T) # shape (2,1) by (1,2) gives (2,2)
cost += ((a3 - y)**2).sum()
n = len(ys) # number of training vectors
# get the average change per training input
return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
def train(self, epochs, eta):
xs = self.train_inputs
ys = self.train_outputs
cost = np.zeros((epochs,))
for e in range(epochs):
d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(xs,ys)
self.b2 -= eta * d_b2
self.w2 -= eta * d_w2
self.b3 -= eta * d_b3
self.w3 -= eta * d_w3
# print(cost[e])
plt.plot(cost)
return cost
# MLP Constructor respectively, Input layer size, Number of hidden neurons,
# Output size
# Fixed hidden layer of 1
xor = XOR_MLP(3,4,2)
xs = xor.train_inputs.T
print(xor.feedforward(xs))
epochs = 1500
c = xor.train(epochs, 3.0)
print(xor.feedforward(xs))
目前没有回答
相关问题 更多 >
编程相关推荐