<p>我发现了问题
sigmoid函数不适合此网络。我把它改为tanh,现在预测结果是正确的</p>
<p>最终代码:</p>
<pre><code>import random
from math import exp,pow
class ANN:
def random_weight(self):
return random.random()
def sigmoid(self,x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(self,x):
return self.sigmoid(x)*(1.0-self.sigmoid(x))
def tanh(self, x):
return (exp(x) - exp(-x)) / (exp(x) + exp(-x))
def tanh_drv(self,x):
return 1 - pow(self.tanh(x),2)
def __init__(self):
self.w11_I = self.random_weight()
self.w12_I = self.random_weight()
self.w21_I = self.random_weight()
self.w22_I = self.random_weight()
self.w31_I = self.random_weight()
self.w32_I = self.random_weight()
self.w11_II = self.random_weight()
self.w12_II = self.random_weight()
self.w13_II = self.random_weight()
self.b_I = self.random_weight()
self.activation = self.tanh
self.activation_drv = self.tanh_drv
def predict(self,x1,x2):
a1_I = self.w11_I*x1 + self.w12_I*x2 + self.b_I
z1_I = self.activation(a1_I)
a2_I = self.w21_I*x1 + self.w22_I*x2 + self.b_I
z2_I = self.activation(a2_I)
a3_I = self.w31_I*x1 + self.w32_I*x2 + self.b_I
z3_I = self.activation(a3_I)
a1_II = self.w11_II*z1_I + self.w12_II*z2_I + self.w13_II*z3_I
z1_II = self.activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(self,x1,x2,y,alpha):
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = self.predict(x1,x2)
error = 0.5 * pow(z1_II-y,2)
delta = (z1_II-y) * self.activation_drv(a1_II)
d_w11_II = delta * z1_I * alpha
d_w12_II = delta * z2_I * alpha
d_w13_II = delta * z3_I * alpha
d_w11_I = delta * self.w11_II * self.activation_drv(a1_I) * x1 * alpha
d_w12_I = delta * self.w11_II * self.activation_drv(a1_I) * x2 * alpha
d_w21_I = delta * self.w12_II * self.activation_drv(a2_I) * x1 * alpha
d_w22_I = delta * self.w12_II * self.activation_drv(a2_I) * x2 * alpha
d_w31_I = delta * self.w13_II * self.activation_drv(a3_I) * x1 * alpha
d_w32_I = delta * self.w13_II * self.activation_drv(a3_I) * x2 * alpha
d_b_I = (delta * self.w11_II * self.activation_drv(a1_I) + delta * self.w12_II * self.activation_drv(a2_I) + delta * self.w13_II * self.activation_drv(a3_I)) * alpha
self.w11_II -= d_w11_II
self.w12_II -= d_w12_II
self.w13_II -= d_w13_II
self.w11_I -= d_w11_I
self.w12_I -= d_w12_I
self.w21_I -= d_w21_I
self.w22_I -= d_w22_I
self.w31_I -= d_w31_I
self.w32_I -= d_w32_I
self.b_I -= d_b_I
return error
model = ANN()
data = [
[0,0,0],
[0,1,0],
[1,0,0],
[1,1,1],
]
for i in range(0,200):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += model.train(j[0],j[1],j[2],0.1)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = model.predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
</code></pre>
<p>代码结果:</p>
<pre><code>...
0.1978539306282795
0.19794670251861882
0.19745074826953185
0.19529942727878868
0.19779970636626873
0.19661596298810918
0 , 0 = -0.24217968147818447
0 , 1 = 0.236033934015224
1 , 0 = 0.24457439328909888
1 , 1 = 0.5919949310028919
</code></pre>