我使用Python2.7进行特性选择,数据集加载良好,但当我运行代码时,它会给我这个错误
if __name__ == '__main__':
def generic_combined_scorer(x1, o1, ii_1, x2, o2, ii_2, y, h):
s1 = h(x1, y)
s2 = h(x2, y)
o1[ii_1] = s1
o2[ii_2] = s2
NUM_CV = 3
RANDOM_SEED = 123
MAX_ITER = 1000
# leuk = fetch_mldata('iris', transpose_data=True)
X = pd.read_csv(r'C:\Users\pc\Desktop\dataset\leukemia.csv')
y = pd.read_csv(r'C:\Users\pc\Desktop\dataset\leukemia.csv')
initial_scores = Parallel(n_jobs=n_jobs)(delayed(estimator)(X.iloc[:, 1], y.iloc[:, 1]) for ii in range(num_dim))
# rank the scores in descending order
sorted_scores_idxs = np.flipud(np.argsort(initial_scores))
selected_feature_idxs = np.zeros(num_selected_features, dtype=int)
remaining_candidate_idxs = range(1, K_MAX_internal)
# mi_matrix = np.empty((K_MAX_internal,num_selected_features-1))
# mi_matrix[:] = np.nan
relevance_vec_fname = os.path.join(tmp_folder, 'relevance_vec')
feature_redundance_vec_fname = os.path.join(tmp_folder, 'feature_redundance_vec')
mi_matrix_fname = os.path.join(tmp_folder, 'mi_matrix')
relevance_vec = np.memmap(relevance_vec_fname, dtype=float,
shape=(K_MAX_internal,), mode='w+')
feature_redundance_vec = np.memmap(feature_redundance_vec_fname, dtype=float,
shape=(K_MAX_internal,), mode='w+')
mi_matrix = np.memmap(mi_matrix_fname, dtype=float,
shape=(K_MAX_internal, num_selected_features - 1), mode='w+')
mi_matrix[:] = np.nan
# TODO: investigate whether its worth it to parallelize the nested for-loop?
with tqdm(total=num_selected_features, desc='Selecting Features ...', disable=(not verbose)) as pbar:
pbar.update(1)
for k in range(1, num_selected_features):
ncand = len(remaining_candidate_idxs)
last_selected_feature = k - 1
Parallel(n_jobs=n_jobs)(delayed(generic_combined_scorer)(y, relevance_vec, ii,
X_subset[:, selected_feature_idxs[last_selected_feature]],
feature_redundance_vec, ii, X_subset.iloc[:, ii],
estimator)for ii in remaining_candidate_idxs)
# copy the redundance into the mi_matrix, which accumulates our redundance as we compute
mi_matrix[remaining_candidate_idxs, last_selected_feature] = feature_redundance_vec[remaining_candidate_idxs]
redundance_vec = np.nanmean(mi_matrix[remaining_candidate_idxs, :], axis=1)
tmp_idx = np.argmax(relevance_vec[remaining_candidate_idxs] - redundance_vec)
selected_feature_idxs[k] = remaining_candidate_idxs[tmp_idx]
del remaining_candidate_idxs[tmp_idx]
pbar.update(1)
^{pr2}$
目前没有回答
相关问题 更多 >
编程相关推荐