简单神经网络与MNIST卡在17%误分类

2024-04-25 04:09:36 发布

您现在位置:Python中文网/ 问答频道 /正文

你好,我把我的代码检查,因为我一直在玩我的神经网络实现在python的几个星期,我似乎不能达到一个低于17%的错误,有时16%。我一直在尝试不同的学习速率值,不同的隐藏神经元数量,但仍然没有太多的改进。我很清楚,我的实现是基本的传统神经网络,但我期待更好的结果,根据其他实现我在互联网上看到。我希望这是你们感兴趣的,如果你能给我指出一些新的想法,关于我的代码中可能存在的问题,或者你认为这是我用传统的实现所能做的最好的,我应该添加一些新的东西,这将是非常酷的。你知道吗

无论如何,这里是我的代码,我希望是可读性足够,我试图做它尽可能简单,因为这是我的方式来了解神经网络如何工作。你知道吗

在编辑:也许是我的问题不是很清楚,基本上我想,如果它的兴趣,为你们,是帮助我找到细节,在我目前的实施,可以改善我的错误分类误差低于17%,因为显然这是最好的我的实现可以做的。我将非常感谢任何建议或想法,我对这个话题非常感兴趣,但我是一个初学者,如果有一些聪明的想法可以帮助我改进我的实施,那将是非常好的。你知道吗

文件:mnist_数据集.py-提取mnist数据

import numpy as np
from struct import unpack

train_input_file = open("dataset/train-images-idx3-ubyte", "rb")
train_output_file = open("/dataset/train-labels-idx1-ubyte", "rb")
test_input_file = open("dataset/t10k-images-idx3-ubyte", "rb")
test_output_file = open("dataset/t10k-labels-idx1-ubyte", "rb")


def readData(f,labels = False,scale = 1):
    header = hex(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
num = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
col = 1
row = 1

if labels == False:
    row = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
    col = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])

    data = np.zeros((int(num/scale),col*row))
    for i in range(0,int(num/scale),1):
    data[i] = np.fromfile(f,dtype=np.ubyte,count=col*row)
return data


def getMNISTData():
    def norm(v):
        return v/255

train_input = readData(train_input_file, scale=1)/255.0
train_out = readData(train_output_file, True,scale=1)
test_input = readData(test_input_file)/255.0
test_out = readData(test_output_file, True)

print "Train input: " + str(train_input.shape)
print "Train output: " + str(train_out.shape)
print "Test input: " + str(test_input.shape)
print "Test output: " + str(test_out.shape)

train_input_file.close()
train_output_file.close()
test_input_file.close()
test_output_file.close()
return (train_input,train_out,test_input,test_out)

文件:NN.py公司-神经网络实现

import mnist_dataset
import numpy as np
import random
import matplotlib.pyplot as plt


def encode_data_10(v):
    e = (0.0) * np.ones((1, 10), dtype=float)
    e[:, int(v)] = 1.0
    return e.tolist()

def encode_data_1(v):
    n = -1.0 + ((0.2)*v)
    return n

x_train, y_train, x_test, y_test =  mnist_dataset.getMNISTData()

learning_rate = 1.0
iter = 3000
sample_size = 30
num_hidden_neurons = 500
num_output_neurons = 10

if num_output_neurons > 1:
    y_train = np.matrix(np.array(map(encode_data_10,y_train)))
    y_test = np.matrix(np.array(map(encode_data_10,y_test)))
else:
    y_train = np.matrix(map(encode_data_1,y_train))
    y_test = np.matrix(map(encode_data_1,y_test))


def getSample(sample_size,x,y):
    r = random.sample(xrange(1, len(y), 1), sample_size)
    x_r = np.zeros((sample_size,x.shape[1]))
    y_r = np.zeros((sample_size,y.shape[1]))
    for i,n in enumerate(r):
        x_r[i] = x[n]
        y_r[i] = y[n]
    return (x_r,y_r)

inputVector, targetVector = getSample(sample_size, x_train, y_train)




hiddenWeights = np.mat(np.random.random((num_hidden_neurons, x_train.shape[1])))
print "W0 shape: " + str(hiddenWeights.shape)
outputWeights = np.mat(np.random.random((num_output_neurons,num_hidden_neurons)))
print "W1 shape: " + str(outputWeights.shape)


def act_func_l1(a):
    return (1.0/(1 + np.exp(-a)))

def der_act_func_l1(a):
    return act_func_l1(a)*(1.0 - act_func_l1(a))


def feedforward(l0):
    global hiddenWeights
    global outputWeights

    Z1 = l0 * hiddenWeights.T

    layer1 = np.matrix(act_func_l1(np.asarray(Z1)))

    Z2 = layer1 * outputWeights.T

    layer2 = act_func_l1(np.asarray(Z2))

    return (layer1,layer2)

def miss(x,y):
    layer1, layer2 = feedforward(x)

    def c(n):
        if n > 0.5:
            return 1.0
        else:
            return 0.0

    layer2 = map(lambda v: map(c, v), layer2)

    def cc(t):
        return np.abs(cmp(np.array(y[t[0]]).tolist()[0], np.array(t[1]).tolist()))
    return (np.sum(map(cc, enumerate(layer2))))

miss_x = np.zeros((iter, 1))
for j in xrange(iter):

    hiddenActualInput = inputVector * hiddenWeights.T

    hiddenOutputVector = np.matrix(act_func_l1(np.asarray(hiddenActualInput)))

    outputActualInput = hiddenOutputVector * outputWeights.T

    outputVector = act_func_l1(np.asarray(outputActualInput))


    layer2_error2 = np.square(outputVector - targetVector)
    print "Error: " + str(np.mean(np.abs(layer2_error2)))


    m = miss(x_test,y_test)
    miss_x[j] = m
    print str(j) + " - Misses (%): " + str(m)
    if m <= 2000:
        learning_rate = 0.05


    outputDelta = np.mat(der_act_func_l1(np.asarray(outputVector))*np.asarray(outputVector - targetVector))
    hiddenDelta =  np.mat(der_act_func_l1(np.asarray(hiddenOutputVector)) * np.asarray((outputDelta*outputWeights)))

    hiddenWeights = np.mat(hiddenWeights.T - (learning_rate*np.asarray(inputVector.T*hiddenDelta))).T
    outputWeights = np.mat(outputWeights.T - (learning_rate*np.asarray(hiddenOutputVector.T*outputDelta))).T

    inputVector, targetVector = getSample(sample_size, x_train, y_train)

plt.plot(xrange(iter), miss_x, label = 'Miss rate(%)')
plt.legend(loc='upper right')
plt.show()

Tags: testl1inputoutputdatareturndefnp