获取错误的逻辑回归误差曲线（代码中有错误）

def hypothesis(x, theta, b): h = np.dot(x, theta) + b return sigmoid(h) def sigmoid(z): return 1.0/(1.0+np.exp(-1.0*z)) def error(y_true, x, w, b): m = x.shape[0] err = 0.0 for i in range(m): hx = hypothesis(x[i], w, b) if(hx==0): err += (1-y_true[i])*np.log2(1-hx) elif(hx==1): err += y_true[i]*np.log2(hx) else: err += y_true[i]*np.log2(hx) + (1-y_true[i])*np.log2(1-hx) return -err/m def get_gradient(y_true, x, w, b): grad_w = np.zeros(w.shape) grad_b = 0.0 m = x.shape[0] for i in range(m): hx = hypothesis(x[i], w, b) grad_w += (y_true[i] - hx)*x[i] grad_b += (y_true[i] - hx) grad_w /= m grad_b /= m return [grad_w, grad_b] def gradient_descent(y_true, x, w, b, learning_rate=0.1): err = error(y_true, x, w, b) grad_w, grad_b = get_gradient(y_true, x, w, b) w = w + learning_rate*grad_w b = b + learning_rate*grad_b return err, w, b def predict(x,w,b): confidence = hypothesis(x,w,b) if confidence<0.5: return 0 else: return 1 def get_acc(x_tst,y_tst,w,b): y_pred = [] for i in range(y_tst.shape[0]): p = predict(x_tst[i],w,b) y_pred.append(p) y_pred = np.array(y_pred) return float((y_pred==y_tst).sum())/y_tst.shape[0] def form_binary_classes(a_start, a_end, b_start, b_end): x = np.vstack((X[a_start:a_end], X[b_start:b_end])) y = np.hstack((Y[a_start:a_end], Y[b_start:b_end])) print("{} {}".format(x.shape,y.shape[0])) loss = [] acc = [] w = 2*np.random.random((x.shape[1],)) b = 5*np.random.random() for i in range(100): l, w, b = gradient_descent(y, x, w, b, learning_rate=0.5) acc.append(get_acc(X_test,Y_test,w)) loss.append(l) plt.plot(loss) plt.ylabel("Negative of Log Likelihood") plt.xlabel("Time") plt.show()

1条回答

网友

1楼 · 发布于 2024-05-12 22:26:11

你在计算错误时遇到了一个问题，这很可能导致你的模型无法收敛。你知道吗

在您的代码中，当您考虑角点情况时，如果hx==0或hx==1，您计算的误差为零，即使我们有预测误差，例如hx==0而ytrue=1

in this case, we come inside the first if, and the error will be (1-1)*log2(1) =0, which is not correct.

您可以通过以下方式修改前两个IF来解决此问题：

def error(y_true, x, w, b):
    m = x.shape[0]
    err = 0.0
    for i in range(m):
        hx = hypothesis(x[i], w, b)
        if(hx==y_true[i]): #Corner cases where we have zero error
            err += 0
        elif((hx==1 and y_true[i]==0) or (hx==0 and y_true[i]==1) ): #Corner cases where we will have log2 of zero
            err += np.iinfo(np.int32).min # which is an approximation for log2(0), and we penalzie the model at most with the greatest error possible
        else:
            err += y_true[i]*np.log2(hx) + (1-y_true[i])*np.log2(1-hx)
    return -err/m

在这部分代码中，我假设您有二进制标签

相关问题更多 >

编程相关推荐

热门问题

热门文章