IndexError: Pandas Dataframe操作中索引过多

0 投票
1 回答
705 浏览
提问于 2025-04-18 08:44

好的,这是我用一对多的逻辑回归做多分类任务的代码,还加了一些正则化。我这两天一直在琢磨这个,搞不懂为什么它不管用。

import pandas as pd
import numpy as np
import scipy.optimize as sp

Data = pd.read_csv(Location, 
            sep=';',
            dtype = np.float64,
            header = None)
X = Data.ix[:,0:1]
y = Data.ix[:,2:]
y.columns = [0]

def sigmoid(z) :
    g = 1.0/(1.0+np.exp(-z))
    return g

def lrCostFunction(theta, X, y, lambd):
    m , n = X.shape
    J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m 
    J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m); 
    return J.ix[0,0]

def Gradient(theta, X, y, lambd):
    m , n  = X.shape
    grad = X.T.dot(sigmoid(X.dot(theta))-y)/m
    grad.ix[1:(n-1),:] = grad.ix[1:(n-1),:] + lambd*theta.ix[1:(n-1),:]/m;
    return grad.values.flatten().tolist()    

def oneVsAll(X, y, num_labels, lambd):
    m , n = X.shape
    all_theta = pd.DataFrame(data = [[0 for col in range(n+1)] for row in range(num_labels)])
    ones = pd.DataFrame(data = [1 for i in range(X.shape[0])])
    X = pd.concat([ones,X], axis = 1)
    for c in range(0,num_labels-1) : 
        initial_theta = pd.DataFrame(data = [0 for i in range(n+1)])
        theta = sp.minimize(fun = lrCostFunction, 
                                    x0 = initial_theta, 
                                    args = (X,y,lambd),
                                    method = 'TNC',
                                    jac = Gradient)
        all_theta.ix[c,:] = theta
    return all_theta
oneVsAll(X, y, 4, 0.1)

然后它显示:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-27-b18648b06674> in <module>()
      1 theta = pd.DataFrame(data = [0 for i in range(X.shape[1])])
----> 2 oneVsAll(X, y, 4, 0.1)

<ipython-input-26-ba0f7093d1f6> in oneVsAll(X, y, num_labels, lambd)
     10                                     args = (X,y,lambd),
     11                                     method = 'TNC',
---> 12                                     jac = Gradient)
     13         all_theta.ix[c,:] = theta
     14     return all_theta

/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/_minimize.pyc in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    381     elif meth == 'tnc':
    382         return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
--> 383                              **options)
    384     elif meth == 'cobyla':
    385         return _minimize_cobyla(fun, x0, args, constraints, **options)

/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in _minimize_tnc(fun, x0, args, jac, bounds, eps, scale, offset, mesg_num, maxCGit, maxiter, eta, stepmx, accuracy, minfev, ftol, xtol, gtol, rescale, disp, callback, **unknown_options)
    396                                         offset, messages, maxCGit, maxfun,
    397                                         eta, stepmx, accuracy, fmin, ftol,
--> 398                                         xtol, pgtol, rescale, callback)
    399 
    400     funv, jacv = func_and_grad(x)

/Users/jean-marcmarty/anaconda/lib/python2.7/site-packages/scipy/optimize/tnc.pyc in func_and_grad(x)
    358     else:
    359         def func_and_grad(x):
--> 360             f = fun(x, *args)
    361             g = jac(x, *args)
    362             return f, g

<ipython-input-24-5f31e87e00da> in lrCostFunction(theta, X, y, lambd)
      2     m , n = X.shape
      3     J=-(y.T.dot(np.log(sigmoid(X.dot(theta))))+(1-y).T.dot(np.log(1-sigmoid(X.dot(theta)))))/m
----> 4     J = J + (theta.T.dot(theta)- np.power(theta[0,0],2))*(lambd)/(2*m);
      5     return J.ix[0,0]

IndexError: too many indices

1 个回答

0

我对数学不太了解,但这个错误是出现在这段代码里:

theta[0,0]

这里的Theta是一个一维数组,所以你需要用theta[0]来访问它,除非你有特别的理由认为它应该是二维的?

撰写回答