我正在练习从音频文件中提取的一些特征的回归
下面提到的代码适用于2个CSV文件,在我尝试读取2个不同的CSV文件后,它返回了意外错误
我曾试图找出问题,但由于某种原因,我弄不明白
import..
FEATURES = 'C:\\OS\\opensmile-2.3.0\\data'
ANNOTATIONS = 'D:\\Study\\PMEmo2019\\PMEmo2019\\annotations'
DATASET_DIR_2 = 'D:\\Study\\DEAM\\'
def load_static_features():
features_csv = os.path.join(FEATURES, 'DEAM_New.csv')
static_features= pd.read_csv(features_csv, index_col=0)
return static_features
def load_static_features_and_valence():
static_features = load_static_features()
valence_csv = os.path.join(ANNOTATIONS, 'DEAM.csv')
valence = pd.read_csv(valence_csv, index_col=0, usecols=['musicId','V'])
return static_features.join(valence).dropna()
def load_static_features_and_arousal():
static_features = load_static_features()
arousal_csv = os.path.join(ANNOTATIONS, 'DEAM.csv')
arousal = pd.read_csv(arousal_csv, index_col=0, usecols=['musicId','A'])
return static_features.join(arousal).dropna()
def load_audio_dataset(data):
features = data[data.columns[:-1]].values
labels = data[data.columns[-1]].values
# scaler = StandardScaler(copy=False)
# scaler.fit_transform(features)
return features, labels
def rmse(y, y_pred):
return sqrt(mean_squared_error(y, y_pred))
regressors = {
'Lasso': Lasso(),
'ElasticNet': ElasticNet(),
'Ridge': Ridge(),
'kNN': KNeighborsRegressor(),
'SVRrbf': SVR(kernel='rbf', gamma='scale'),
'SVRpoly': SVR(kernel='poly', gamma='scale'),
'SVRlinear': SVR(kernel='linear', gamma='scale'),
'DT': DecisionTreeRegressor(max_depth=5),
'RF': RandomForestRegressor(max_depth=5, n_estimators=10, max_features=1),
'MLP': MLPRegressor(hidden_layer_sizes=(200,50), max_iter=2000),
'AdaBoost': AdaBoostRegressor(n_estimators=10),
}
from tqdm import notebook
import IPython.display as ipd
def cross_val_regression(regressors, features, labels, preprocessfunc):
columns = list(regressors.keys())
scores = pd.DataFrame(columns=columns, index=['RMSE'])
for reg_name, reg in notebook.tqdm(regressors.items(), desc='regressors'):
scorer = {'rmse': make_scorer(rmse)}
reg = make_pipeline(*preprocessfunc, reg)
reg_score = cross_validate(reg, features, labels, scoring=scorer, cv=10, return_train_score=False)
scores.loc['RMSE', reg_name] = reg_score['test_rmse'].mean()
# scores.loc['R', reg_name] = reg_score['test_r'].mean()
mean_rmse = scores.mean(axis=1)
std_rmse = scores.std(axis=1)
scores['Mean'] = mean_rmse
scores['std'] = std_rmse
return scores
def format_scores(scores):
def highlight(s):
is_min = s == min(s)
# is_max = s == max(s)
# is_max_or_min = (is_min | is_max)
return ['background-color: yellow' if v else '' for v in is_min]
scores = scores.style.apply(highlight, axis=1, subset=pd.IndexSlice[:, :scores.columns[-2]])
return scores.format('{:.3f}')
prefunc = [StandardScaler()]
print('In Arousal dimension...')
data_a = load_static_features_and_arousal()
features_a, labels_a = load_audio_dataset(data_a)
scores_a_a = cross_val_regression(regressors, features_a, labels_a, prefunc)
ipd.display(format_scores(scores_a_a))
print('In Valence dimension...')
data_v = load_static_features_and_valence()
features_v, labels_v = load_audio_dataset(data_v)
scores_a_v = cross_val_regression(regressors, features_v, labels_v, prefunc)
ipd.display(format_scores(scores_a_v))
回溯是-
In Arousal dimension...
Traceback (most recent call last):
File "C:/Users/asus/PycharmProjects/untitled/Useless_Random_forest.py", line 106, in <module>
data_a = load_static_features_and_arousal()
File "C:/Users/asus/PycharmProjects/untitled/Useless_Random_forest.py", line 41, in load_static_features_and_arousal
arousal = pd.read_csv(arousal_csv, index_col=0, usecols=['musicId','A'])
File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 676, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 448, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 880, in __init__
self._make_engine(self.engine)
File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 1114, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 1937, in __init__
_validate_usecols_names(usecols, self.orig_names)
File "C:\Users\asus\anaconda3\envs\untitled\lib\site-packages\pandas\io\parsers.py", line 1233, in _validate_usecols_names
"Usecols do not match columns, "
ValueError: Usecols do not match columns, columns expected but not found: ['A']
Process finished with exit code 1
根据错误很明显,我的CSV文件缺少A
列,但这里是我的CSV文件的摘录。(用于读取CSV文件)
musicId A V
0 2 3 3.1
1 3 3.3 3.5
2 4 5.5 5.7
3 5 5.3 4.4
4 7 6.4 5.8
快照:
提供的路径也是正确的,我已经仔细检查了它们
V列会弹出相同的错误
目前没有回答
相关问题 更多 >
编程相关推荐