当我训练我的最大似然估计程序时,对数似然正在下降

2024-04-25 22:37:32 发布

您现在位置:Python中文网/ 问答频道 /正文

我实现了一个最大似然估计程序。我的可能性通常会在数据集上的每个完整批处理纪元之后增加

我相信有逻辑错误,但我找不到。能找个更有经验的人帮我吗。我也愿意改进

import scipy.io
import math
import numpy as np

dataset = scipy.io.loadmat('dataset.mat')

data = dataset['hog_features_train'] # Size is [2000, 324]
labels = dataset['superclass_labels_train'] # Size is [2000, 1]
NUMBER_OF_FEATURES = len(dataset['hog_features_train'][0]) # Is 324

# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES + 1)

# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
    return np.dot(w[:NUMBER_OF_FEATURES], observation) + w[NUMBER_OF_FEATURES]

# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
    return sigmoid(linear(w, observation))

# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelyhood(w, data, labels):
    sum = 0
    for i in range(len(data)):
        sum += labels[i] * np.log(prob(w, data[i])) + (1 - labels[i]) * np.log(1 - prob(w, data[i]))
    return sum

# NOTE: d/dx(log(1/(1 + e^(a * x + b)))) = -(a * e^(a * x + b))/(e^(a x + b) + 1)
def gradient(w, data, labels):
    #Initialze gradient vector
    gradient = np.zeros(len(w))

    # For input coefficients
    for i in range(len(w) - 1):
        for j in range(len(data)):
            power = math.exp(linear(w, data[j]))
            gradient[i] += - labels[j] * data[j][i] * power / (1 + power) + (1 - labels[j]) * (1 + data[j][i] * power / (1 + power))

    # Gradient term for bias
    for j in range(len(data)):
        power = math.exp(linear(w, data[j]))
        gradient[len(w) - 1] += - labels[j] * power / (1 + power) + (1 - labels[j]) * (1 + power / (1 + power))

    return gradient

LEARNING_RATE = 0.0001
EPOCH = 1000

# Calculate the LLF
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood at the beginning: ', likelyhood)

# Gradient ascent algorithm
for i in range(EPOCH):
    gradient1 = gradient(w, data, labels)
    w += gradient1 * LEARNING_RATE

    likelyhood = log_likelyhood(w, data, labels)
    print('likelyhood after epoch', i + 1, ': ', likelyhood)

数据集如果要复制我的结果:

https://drive.google.com/open?id=1tCHXDnxql-_mEvEjFh4OVmev9N_Nreu5


Tags: inlogfordatalabelslenreturndef
2条回答

您的代码非常慢,请学习使用矢量化,而您计算的导数是错误的,我没有仔细检查代码,但这是您的代码的矢量化版本,具有正确的渐变,请查看它以这种方式运行的速度:

import scipy.io
import math
import numpy as np

dataset = scipy.io.loadmat('dataset.mat')

data = dataset['hog_features_train'].astype('float64') # Size is [2000, 324]
bias_term = np.ones(shape=(2000,1))
data = np.concatenate(( bias_term , data), axis=1) # add bias term as an extra 1 in data features

labels = dataset['superclass_labels_train'].astype('float16') # Size is [2000, 1]
NUMBER_OF_FEATURES = data.shape[1] # Is 325

# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES)

# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
    return np.matmul(observation,w)

# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
    return sigmoid(linear(w, observation))

# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelyhood(w, data, labels):
    return np.sum(prob(w, data))

# NOTE: d/dw(log(1/(1 + e^(-w * x + b)))) =  x / (1 + e^(wx+b))
def gradient(w, data, labels):
    #Initialze gradient vector
    denom = (np.exp(linear(w, data)) + 1)
    denom = np.expand_dims(denom, axis=1) # reshape from (2000,) to (2000, 1) for broadcasting
    gradient = np.zeros_like(w)
    gradient[1:] = np.sum((data[:, 1:] * labels) / denom, axis=0)
    gradient[0]  = np.sum(-1 / denom)
    return gradient

LEARNING_RATE = 0.0001
EPOCH = 1000

# Calculate the LLF
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood at the beginning: ', likelyhood)

# Gradient ascent algorithm
for i in range(EPOCH):
    gradient1 = gradient(w, data, labels)
    w += gradient1 * LEARNING_RATE

    likelyhood = log_likelyhood(w, data, labels)
    print('likelyhood after epoch', i + 1, ': ', likelyhood)

问题似乎在于gradient()函数中并非所有参数都同时更新

import scipy.io
import math
import numpy as np
import matplotlib.pyplot as plt

# Load the data
dataset = scipy.io.loadmat('dataset.mat')

# Extract the feature matrix
data = dataset['hog_features_train'] # Size is [2000, 324]

# Extract the labels
labels = dataset['superclass_labels_train'] # Size is [2000, 1]

# Extract the number of features
NUMBER_OF_FEATURES = data.shape[1] # Is 324

# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES + 1)

# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
    return np.dot(w, np.hstack([observation, 1]))

# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
    return sigmoid(linear(w, observation))

# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelihood(w, data, labels):
    sum = 0
    for i in range(len(data)):
        sum += labels[i] * np.log(prob(w, data[i, :])) + (1 - labels[i]) * np.log(1 - prob(w, data[i, :]))
    return sum

# gradient = Σᵢ 𝐱ᵢ (𝑦ᵢ - prob(𝐱ᵢ))
def gradient(w, data, labels):

    # Initialize gradient vector
    gradient = np.zeros(len(w))

    # Update gradient vector
    for i in range(len(data)):

        gradient += np.hstack([data[i, :], 1]) * (labels[i] - prob(w, data[i, :]))

    return gradient

LEARNING_RATE = 0.0001
EPOCH = 1000

# Calculate the LLF
loglikelihood = [log_likelihood(w, data, labels)[0]]
print('loglikelihood at the beginning: ', loglikelihood[0])

# Run the gradient ascent algorithm
for i in range(EPOCH):

    gradient1 = gradient(w, data, labels)

    w += gradient1 * LEARNING_RATE

    LLF = log_likelihood(w, data, labels)[0]

    loglikelihood.append(LLF)

    print('likelihood after epoch', i + 1, ': ', LLF)

# Plot the loglikelihood
plt.plot(np.arange(1 + EPOCH), loglikelihood)
plt.xlabel('Epoch')
plt.ylabel('Loglikelihood')
plt.show()

enter image description here

相关问题 更多 >