计算深神经网络对输入的偏导数

def compute_derivative(self): """Computes the network derivative and returns an array with the change in output with respect to each input""" self.compute_layer_derivative(0) for l in np.arange(1,self.size): dl = self.compute_layer_derivative(l) dprev = self.layers[l-1].derivatives self.output_derivatives = dl.T.dot(dprev) return self.output_derivatives def compute_layer_derivative(self, l_id): wL = self.layers[l_id].w zL = self.layers[l_id].output daL = self.layers[l_id].f(zL, div=1) daLM = np.repeat(daL,wL.shape[0], axis=0) self.layers[l_id].derivatives = np.multiply(daLM,wL) return self.layers[l_id].derivatives

J_L1 = Jacobian(x = np.array([[1,1]])) # where [1,1] are the inputs of to the network (i.e. values of the neuron in the input layer) J_L2 = Jacobian(x = np.array([[3,3]])) # where [3,3] are the neuron values of layer 1 before activation # in the output layer the weights and biases are adjusted as there is 1 neuron rather than 2 J_Lout = Jacobian(x = np.array([[2.90514825, 2.90514825]]), w = np.array([[1],[1]]), b = np.array([[1]]))# where [2.905,2.905] are the neuron values of layer 2 before activation J_out_to_in = J_Lout.T.dot(J_L2).dot(J_L1)

1条回答

网友

1楼 · 发布于 2024-04-19 19:14:18

下面是我如何得出你的例子应该给出的：

# i'th component of vector-valued function S(x) (sigmoid-weighted layer)
S_i(x) = 1 / 1 + exp(-w_i . x + b_i) # . for matrix multiplication here

# i'th component of vector-valued function L(x) (linear-weighted layer)
L_i(x) = w_i . x # different weights than S.
# as it happens our L(x) output 1 value, so is in fact a scalar function

F(x) = L(S(x)) # final output value

#derivative of F, denoted as J(F, x) to mean the Jacobian of the function F, evaluated at x.
J(F, x) = J(L(S(x)), x) = J(L, S(x)) . J(S, x) # chain rule for multivariable, vector-valued functions

#First, what's the derivative of L?
J(L, S(x)) = L

这通常是一个令人惊讶的结果，但是您可以通过计算一些随机矩阵M的M . x的偏导数来验证这一点。如果你计算所有的导数并把它们放入雅可比矩阵中，你将得到M。在

^{pr2}$

现在我们来举一个到处都是1的调试示例。在

w_i = b = x = [1, 1]

#define a to make this less cluttered
a = exp(-w_i . x + b) = exp(-3)

J(S_i, x) = a / (1 + a)^2 * [1, 1]
J(S, x) = a / (1 + a)^2 * [[1, 1], [1, 1]]
J(L, S(x)) = [1, 1] #Doesn't depend on S(x)

J(F, x) = J(L, S(x)) . J(S, x) = (a / (1 + a)**2) * [1, 1] . [[1, 1], [1, 1]]
J(F, x) = (a / (1 + a)**2) * [2, 2] = (2 * a / (1 + a)**2) * [1, 1]
J(F, x) = [0.0903533, 0.0903533]

希望这能帮助您重新组织代码。你不能仅仅用w_i . x的值来计算导数，你需要分别使用w_i和{}来正确计算所有的东西。在

编辑

因为我觉得这很有趣，下面是我的python脚本计算神经网络的值和一阶导数：

import numpy as np

class Layer:
    def __init__(self, weights_matrix, bias_vector, sigmoid_activation = True):
        self.weights_matrix = weights_matrix
        self.bias_vector = bias_vector
        self.sigmoid_activation = sigmoid_activation

    def compute_value(self, x_vector):
        result = np.add(np.dot(self.weights_matrix, x_vector), self.bias_vector)
        if self.sigmoid_activation:
            result = np.exp(-result)
            result = 1 / (1 + result)

        return result

    def compute_value_and_derivative(self, x_vector):
        if not self.sigmoid_activation:
            return (self.compute_value(x_vector), self.weights_matrix)
        temp = np.add(np.dot(self.weights_matrix, x_vector), self.bias_vector)
        temp = np.exp(-temp)
        value = 1.0 / (1 + temp)
        temp = temp / (1 + temp)**2
        #pre-multiplying by a diagonal matrix multiplies each row by
        #the corresponding diagonal element
        #(1st row with 1st value, 2nd row with 2nd value, etc...)
        jacobian = np.dot(np.diag(temp), self.weights_matrix)
        return (value, jacobian)

class Network:
    def __init__(self, layers):
        self.layers = layers

    def compute_value(self, x_vector):
        for l in self.layers:
            x_vector = l.compute_value(x_vector)

        return x_vector

    def compute_value_and_derivative(self, x_vector):
        x_vector, jacobian = self.layers[0].compute_value_and_derivative(x_vector)
        for l in self.layers[1:]:
            x_vector, j = l.compute_value_and_derivative(x_vector)
            jacobian = np.dot(j, jacobian)

        return x_vector, jacobian

#first weights
l1w = np.array([[1,1],[1,1]])
l1b = np.array([1,1])

l2w = np.array([[1,1],[1,1]])
l2b = np.array([1,1])

l3w = np.array([1, 1])
l3b = np.array([0])

nn = Network([Layer(l1w, l1b),
              Layer(l2w, l2b),
              Layer(l3w, l3b, False)])

r = nn.compute_value_and_derivative(np.array([1,1]))
print r

相关问题更多 >

编程相关推荐

热门问题

热门文章