逻辑回归实现 - 损失未收敛及模型效果差

Question

我正在尝试实现一个逻辑回归模型，也就是一个二分类器。
我需要使用随机梯度下降法，并且要用到二元交叉熵的梯度的封闭形式。
在尝试用数据训练模型后，发现模型似乎没有正常工作：
随着迭代次数的增加，损失值并没有减少，也没有收敛。只有在设置学习率eta为0.002时，才会有一点效果，但这之后如果我们把学习率设置得大很多（也就是eta>>0.002），再把它重置回0.002，模型可能才会收敛。
之后，评估模型的表现时，结果非常糟糕，和一个简单的模型差不多，这个简单模型会把所有测试样本都预测成同一个结果：

Confusion Matrix:
[[344. 240.]
 [294. 322.]]
True Negatives (TN): 322.0
False Positives (FP): 294.0
False Negatives (FN): 240.0
True Positives (TP): 344.0
Sensitivity (Se): 0.589041095890411
Specificity (Sp): 0.5227272727272727
Positive Predictive Value (PPV): 0.5391849529780565
Negative Predictive Value (NPV): 0.5729537366548043
Accuracy (Acc): 0.555
F1 Score: 0.563011456628478
Area Under the ROC Curve (AUC): 0.555.

这个实现有什么问题呢？

import numpy as np
import matplotlib.pyplot as plt


def sigmoid(z):
    sig = 1 / (1+np.exp(-z)) 
    return sig

class ManualLogisticRegression:
    def __init__(self, random_state=1):
        np.random.seed(random_state)
        self.w = np.random.randn(5)

    def fit(self, X, Y, eta=0.005, plot=False):

        if plot:
            loss_vec = np.zeros(len(X))
        for idx, (x, y) in enumerate(zip(X, Y)):

            z = np.dot(x, self.w)
            a = sigmoid(z)
            grad = np.dot(x.T, (a - y))
            self.w -= eta * grad
            if plot:
                loss_vec[idx] = self.log_loss(X, Y)
        if plot:
            plt.plot(loss_vec)
            plt.xlabel('# of iterations')
            plt.ylabel('Loss')

    def log_loss(self, x, y):

        z = np.dot(x, self.w)
        p = sigmoid(z)
        epsilon = 1e-5
        p = np.clip(p, epsilon, 1 - epsilon)
        log_loss = (-1 / len(x)) * np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))
        
        return log_loss

    def predict_proba(self, x):
        """
        This function computes the probability of every example in x to belong to the class "1" using the trained model.
        :param x: Feature matrix (could be also a single vector).
        :return: vector at the length of examples in x where every element is the probability to belong to class "1" per example.
        """

        z = np.dot(x, self.w)
        y_pred_proba = sigmoid(z)

        return y_pred_proba

    def predict(self, x, thresh=0.5):
        """
        This function labels every example according to the calculated probability with the use of a threshold.
        :param x: Feature matrix (could be also a single vector).
        :param thresh: decision threshold.
        :return: vector at the length of examples in x where every element is the estimated label (0 or 1) per example.
        """

        z = np.dot(x, self.w)
        probabilities = sigmoid(z)
        y_pred = np.where(probabilities >= thresh, 1, 0)

        return y_pred

    def score(self, x, y):
        """
        This function computes the accuracy of the trained model's estimations.
        :param x: Feature matrix (could be also a single vector).
        :param y: Adequate true labels (either 1 or 0).
        :return: Estimator's accuracy.
        """
        return np.sum(self.predict(x) == y)/len(y)

    def conf_matrix(self, x, y):
        """
        This function computes the confusion matrix for the prediction of the trained model. First value of the matrix
        was given as a hint.
        :param x: Feature matrix (could be also a single vector).
        :param y: Adequate true labels (either 1 or 0).
        :return: Confusion matrix.
        """
        conf_mat = np.zeros((2, 2))
        y_pred = self.predict(x)
        
        conf = (y_pred == y)
        conf_mat[0, 0] += np.sum(1 * (conf[y_pred == 0] == 1))

           #the code provided is checking if the prediction is matching a true positive case, so it is
           #calculating the number of TN,if y==0 we get True, if y==1 we get False.
        conf_mat[1, 0] += np.sum(1 * (conf[y_pred == 0] == 0)) # FN
        conf_mat[0, 1] += np.sum(1 * (conf[y_pred == 1] == 0)) # FP
        conf_mat[1, 1] += np.sum(1 * (conf[y_pred == 1] == 1)) # TP


        # --------------------------------------------------------------------------------------
        return conf_mat

这是笔记本的内容：

%load_ext autoreload
%autoreload 2
from manual_log_reg import ManualLogisticRegression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

X = pd.read_csv('X_data.csv')
X.drop(columns=X.columns[0], axis=1, inplace=True)
X.head()

y = pd.read_csv('y_data.csv')  # read and convert to numpy
y.drop(columns=y.columns[0], axis=1, inplace=True)
y.head()

X = X.values  # convert to numpy
y = y.values.astype(int).flatten()  # convert to numpy integers and flatten
X = np.concatenate((np.ones((len(y), 1)), X), axis=1) # add bias term

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

log_reg = ManualLogisticRegression()

log_reg.fit(X_train, y_train, eta=0.003, plot=True)


sorted_weights = np.sort(np.abs(log_reg.w[:-1]))  # Exclude bias term and sort by absolute value
most_important_feature_index = np.argmax(np.abs(log_reg.w[:-1]))  # Find the index of the most important feature
most_important_feature_weight = log_reg.w[most_important_feature_index]  # Get the weight of the most important feature
print(f"The most important feature is feature {most_important_feature_index + 1} with weight {most_important_feature_weight}.")

conf_matrix = log_reg.conf_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate additional performance metrics
TN = conf_matrix[1, 1]
FP = conf_matrix[1, 0]
FN = conf_matrix[0, 1]
TP = conf_matrix[0, 0]
Se = TP / (TP + FN)
Sp = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = TN / (TN + FN)
Acc = (TP + TN) / (TP + TN + FP + FN)
F1 = 2 * (PPV * Se) / (PPV + Se)

# Calculate AUC using the score method of ManualLogisticRegression
AUC = log_reg.score(X_test, y_test)

# Report the performance metrics
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")
print(f"True Positives (TP): {TP}")
print(f"Sensitivity (Se): {Se}")
print(f"Specificity (Sp): {Sp}")
print(f"Positive Predictive Value (PPV): {PPV}")
print(f"Negative Predictive Value (NPV): {NPV}")

print(f"Accuracy (Acc): {Acc}")
print(f"F1 Score: {F1}")
print(f"Area Under the ROC Curve (AUC): {AUC}")
conf_mat = log_reg.conf_matrix(X_test, y_test)
import seaborn as sns
import matplotlib.pyplot as plt
# Plot confusion matrix
sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

学习率损失函数模型评估逻辑回归随机梯度下降二元交叉熵模型收敛二分类器

逻辑回归实现 - 损失未收敛及模型效果差

1 个回答

撰写回答