我实现了一个最大似然估计程序。我的可能性通常会在数据集上的每个完整批处理纪元之后增加
我相信有逻辑错误,但我找不到。能找个更有经验的人帮我吗。我也愿意改进
import scipy.io
import math
import numpy as np
dataset = scipy.io.loadmat('dataset.mat')
data = dataset['hog_features_train'] # Size is [2000, 324]
labels = dataset['superclass_labels_train'] # Size is [2000, 1]
NUMBER_OF_FEATURES = len(dataset['hog_features_train'][0]) # Is 324
# Initialize weights with last weight as bias
w = np.random.normal(0, 0.01, NUMBER_OF_FEATURES + 1)
# linear(𝐱) = 𝑏₀ + 𝑏₁𝑥₁ + ⋯ +
def linear(w, observation):
return np.dot(w[:NUMBER_OF_FEATURES], observation) + w[NUMBER_OF_FEATURES]
# sigmoid(𝐱) = 1 / (1 + exp(−𝐱)
def sigmoid(x):
return 1 / (1 + math.exp(-x))
# prob(𝐱) = 1 / (1 + exp(−linear(𝐱))
def prob(w, observation):
return sigmoid(linear(w, observation))
# LLF = Σᵢ(𝑦ᵢ log(prob(𝐱ᵢ)) + (1 − 𝑦ᵢ) log(1 − prob(𝐱ᵢ)))
def log_likelyhood(w, data, labels):
sum = 0
for i in range(len(data)):
sum += labels[i] * np.log(prob(w, data[i])) + (1 - labels[i]) * np.log(1 - prob(w, data[i]))
return sum
# NOTE: d/dx(log(1/(1 + e^(a * x + b)))) = -(a * e^(a * x + b))/(e^(a x + b) + 1)
def gradient(w, data, labels):
#Initialze gradient vector
gradient = np.zeros(len(w))
# For input coefficients
for i in range(len(w) - 1):
for j in range(len(data)):
power = math.exp(linear(w, data[j]))
gradient[i] += - labels[j] * data[j][i] * power / (1 + power) + (1 - labels[j]) * (1 + data[j][i] * power / (1 + power))
# Gradient term for bias
for j in range(len(data)):
power = math.exp(linear(w, data[j]))
gradient[len(w) - 1] += - labels[j] * power / (1 + power) + (1 - labels[j]) * (1 + power / (1 + power))
return gradient
LEARNING_RATE = 0.0001
EPOCH = 1000
# Calculate the LLF
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood at the beginning: ', likelyhood)
# Gradient ascent algorithm
for i in range(EPOCH):
gradient1 = gradient(w, data, labels)
w += gradient1 * LEARNING_RATE
likelyhood = log_likelyhood(w, data, labels)
print('likelyhood after epoch', i + 1, ': ', likelyhood)
数据集如果要复制我的结果:
https://drive.google.com/open?id=1tCHXDnxql-_mEvEjFh4OVmev9N_Nreu5
您的代码非常慢,请学习使用矢量化,而您计算的导数是错误的,我没有仔细检查代码,但这是您的代码的矢量化版本,具有正确的渐变,请查看它以这种方式运行的速度:
问题似乎在于
gradient()
函数中并非所有参数都同时更新相关问题 更多 >
编程相关推荐