反向传播输出趋向于相同的值

self.weights = [] for i in xrange(1, len(layers) - 1): self.weights.append( np.random.uniform(low=-0.2, high=0.2, size=(layers[i-1] + 1, layers[i] + 1))) # Output weights self.weights.append( np.random.uniform(low=-0.2, high=0.2, size=(layers[-2] + 1, layers[-1])))

def back_prop_learning(self, X, y): # add biases to inputs with value of 1 biases = np.atleast_2d(np.ones(X.shape[0])) X = np.concatenate((biases.T, X), axis=1) # Iterate over training set for epoch in xrange(self.epochs): # for each weight w[i][j] in network assign random tiny values # handled in __init__ ''' PROPAGATE THE INPUTS FORWARD TO COMPUTE THE OUTPUTS ''' for example in zip(X, y): # for each node i in the input layer # set input layer outputs equal to input vector outputs activations = [example[0]] # for layer = 1 (first hidden) to output layer for layer in xrange(len(self.weights)): # for each node j in layer weighted_sum = np.dot(activations[layer], self.weights[layer]) # assert number of outputs == number of weights in each layer assert(len(activations[layer]) == len(self.weights[layer])) # compute activation of weighted sum of node j activation = self.logistic(weighted_sum) # append vector of activations activations.append(activation) ''' PROPAGATE DELTAS BACKWARDS FROM OUTPUT LAYER TO INPUT LAYER ''' # for each node j in the output layer # compute error of target - output errors = example[1] - activations[-1] # multiply by derivative deltas = [errors * self.derivative(activations[-1])] # for layer = last hidden layer down to first hidden layer for layer in xrange(len(activations)-2, 0, -1): deltas.append(deltas[-1].dot(self.weights[layer].T) * self.derivative(activations[layer])) ''' UPDATE EVERY WEIGHT IN NETWORK USING DELTAS ''' deltas.reverse() # for each weight w[i][j] in network for i in xrange(len(self.weights)): layer = np.atleast_2d(activations[i]) delta = np.atleast_2d(deltas[i]) self.weights[i] += self.alpha * layer.T.dot(delta)

[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 9.0 [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 4.0 [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 6.0 [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 6.0 [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] 7.0

1条回答

网友

1楼 · 发布于 2024-04-24 19:21:53

我想我已经回答了我的问题。你知道吗

我相信问题在于我是如何计算输出层中的错误的。我将其计算为errors = example[1] - activations[-1]，这创建了一个错误数组，其结果是从目标值中减去我的输出层激活。你知道吗

我改变了这个，使我的目标值是一个0-9的向量，所以我的目标值的索引是1.0。你知道吗

y = int(example[1])
errors_v = np.zeros(shape=(10,), dtype=float)
errors_v[y] = 1.0
errors = errors_v - activations[-1]

我还把我的激活函数改为tanh函数。你知道吗

这大大增加了我的输出层中激活的差异，到目前为止，在有限的测试中，我已经能够达到50%-75%的准确率。希望这能帮助其他人。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章