Usecol错误,即使列存在

2024-05-23 17:44:34 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在练习从音频文件中提取的一些特征的回归

下面提到的代码适用于2个CSV文件,在我尝试读取2个不同的CSV文件后,它返回了意外错误

我曾试图找出问题,但由于某种原因,我弄不明白

import..

FEATURES = 'C:\\OS\\opensmile-2.3.0\\data'
ANNOTATIONS = 'D:\\Study\\PMEmo2019\\PMEmo2019\\annotations'
DATASET_DIR_2 = 'D:\\Study\\DEAM\\'

def load_static_features():
    features_csv = os.path.join(FEATURES, 'DEAM_New.csv')
    static_features= pd.read_csv(features_csv, index_col=0)
    return static_features

def load_static_features_and_valence():
    static_features = load_static_features()
    valence_csv = os.path.join(ANNOTATIONS, 'DEAM.csv')
    valence = pd.read_csv(valence_csv, index_col=0, usecols=['musicId','V'])
    return static_features.join(valence).dropna()

def load_static_features_and_arousal():
    static_features = load_static_features()
    arousal_csv = os.path.join(ANNOTATIONS, 'DEAM.csv')
    arousal = pd.read_csv(arousal_csv, index_col=0, usecols=['musicId','A'])
    return static_features.join(arousal).dropna()

def load_audio_dataset(data):
    features = data[data.columns[:-1]].values
    labels = data[data.columns[-1]].values
#     scaler = StandardScaler(copy=False)
#     scaler.fit_transform(features)
    return features, labels

def rmse(y, y_pred):
    return sqrt(mean_squared_error(y, y_pred))


regressors = {
    'Lasso': Lasso(),
    'ElasticNet': ElasticNet(),
    'Ridge': Ridge(),
    'kNN': KNeighborsRegressor(),
    'SVRrbf': SVR(kernel='rbf', gamma='scale'),
    'SVRpoly': SVR(kernel='poly', gamma='scale'),
    'SVRlinear': SVR(kernel='linear', gamma='scale'),
    'DT': DecisionTreeRegressor(max_depth=5),
    'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
    'MLP': MLPRegressor(hidden_layer_sizes=(200,50), max_iter=2000),
    'AdaBoost': AdaBoostRegressor(n_estimators=10),
}

from tqdm import notebook
import IPython.display as ipd


def cross_val_regression(regressors, features, labels, preprocessfunc):
    columns = list(regressors.keys())
    scores = pd.DataFrame(columns=columns, index=['RMSE'])

    for reg_name, reg in notebook.tqdm(regressors.items(), desc='regressors'):
        scorer = {'rmse': make_scorer(rmse)}
        reg = make_pipeline(*preprocessfunc, reg)
        reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False)
        scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()
    #         scores.loc['R', reg_name] = reg_score['test_r'].mean()

    mean_rmse = scores.mean(axis=1)
    std_rmse = scores.std(axis=1)

    scores['Mean'] = mean_rmse
    scores['std'] = std_rmse
    return scores


def format_scores(scores):
    def highlight(s):
        is_min = s == min(s)
        #         is_max = s == max(s)
        #         is_max_or_min = (is_min | is_max)
        return ['background-color: yellow' if v else '' for v in is_min]

    scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
    return scores.format('{:.3f}')


prefunc = [StandardScaler()]

print('In Arousal dimension...')
data_a = load_static_features_and_arousal()
features_a, labels_a = load_audio_dataset(data_a)

scores_a_a = cross_val_regression(regressors, features_a, labels_a, prefunc)
ipd.display(format_scores(scores_a_a))


print('In Valence dimension...')
data_v = load_static_features_and_valence()
features_v, labels_v = load_audio_dataset(data_v)

scores_a_v = cross_val_regression(regressors, features_v, labels_v, prefunc)
ipd.display(format_scores(scores_a_v))

回溯是-

In Arousal dimension...
Traceback (most recent call last):
  File "C:/Users/asus/PycharmProjects/untitled/Useless_Random_forest.py", line 106, in <module>
    data_a = load_static_features_and_arousal()
  File "C:/Users/asus/PycharmProjects/untitled/Useless_Random_forest.py", line 41, in load_static_features_and_arousal
    arousal = pd.read_csv(arousal_csv, index_col=0, usecols=['musicId','A'])
  File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 676, in parser_f
    return _read(filepath_or_buffer, kwds)
  File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 448, in _read
    parser = TextFileReader(fp_or_buf, **kwds)
  File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 880, in __init__
    self._make_engine(self.engine)
  File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 1114, in _make_engine
    self._engine = CParserWrapper(self.f, **self.options)
  File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 1937, in __init__
    _validate_usecols_names(usecols, self.orig_names)
  File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 1233, in _validate_usecols_names
    "Usecols do not match columns, "
ValueError: Usecols do not match columns, columns expected but not found: ['A']

Process finished with exit code 1

根据错误很明显,我的CSV文件缺少A列,但这里是我的CSV文件的摘录。(用于读取CSV文件)

     musicId     A     V
0          2     3   3.1
1          3   3.3   3.5
2          4   5.5   5.7
3          5   5.3   4.4
4          7   6.4   5.8

快照:

Snap of CSV

提供的路径也是正确的,我已经仔细检查了它们

V列会弹出相同的错误


Tags: columnscsvindatalabelsreturndefload