数组维数为3时出现混淆矩阵错误

OrderedDict([('Accuracy', 1.0), ('95% CI', (0.29240177382128668, nan)), ('No Information Rate', 'ToDo'), ('P-Value [Acc > NIR]', 0.29629629629629622), ('Kappa', 1.0), ("Mcnemar's Test P-Value", 'ToDo')]) ValueErrorTraceback (most recent call last) <ipython-input-30-d8c5dc2bea73> in <module>() 3 y_pred = [1,2,3] 4 cm = ConfusionMatrix(y_actu, y_pred) ----> 5 cm.print_stats() /opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in print_stats(self, lst_stats) 446 Prints statistics 447 """ --> 448 print(self._str_stats(lst_stats)) 449 450 def get(self, actual=None, predicted=None): /opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in _str_stats(self, lst_stats) 427 } 428 --> 429 stats = self.stats(lst_stats) 430 431 d_stats_str = collections.OrderedDict([ /opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in stats(self, lst_stats) 390 d_stats = collections.OrderedDict() 391 d_stats['cm'] = self --> 392 d_stats['overall'] = self.stats_overall 393 d_stats['class'] = self.stats_class 394 return(d_stats) /opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/cm.py in __getattr__(self, attr) 33 Returns (weighted) average statistics 34 """ ---> 35 return(self._avg_stat(attr)) /opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in _avg_stat(self, stat) 509 v = getattr(binary_cm, stat) 510 print(v) --> 511 s_values[cls] = v 512 value = (s_values * self.true).sum() / self.population 513 return(value) /opt/conda/lib/python3.5/site-packages/pandas/core/series.py in __setitem__(self, key, value) 771 # do the setitem 772 cacher_needs_updating = self._check_is_chained_assignment_possible() --> 773 setitem(key, value) 774 if cacher_needs_updating: 775 self._maybe_update_cacher() /opt/conda/lib/python3.5/site-packages/pandas/core/series.py in setitem(key, value) 767 pass 768 --> 769 self._set_with(key, value) 770 771 # do the setitem /opt/conda/lib/python3.5/site-packages/pandas/core/series.py in _set_with(self, key, value) 809 if key_type == 'integer': 810 if self.index.inferred_type == 'integer': --> 811 self._set_labels(key, value) 812 else: 813 return self._set_values(key, value) /opt/conda/lib/python3.5/site-packages/pandas/core/series.py in _set_labels(self, key, value) 826 if mask.any(): 827 raise ValueError('%s not contained in the index' % str(key[mask])) --> 828 self._set_values(indexer, value) 829 830 def _set_values(self, key, value): /opt/conda/lib/python3.5/site-packages/pandas/core/series.py in _set_values(self, key, value) 831 if isinstance(key, Series): 832 key = key._values --> 833 self._data = self._data.setitem(indexer=key, value=value) 834 self._maybe_update_cacher() 835 /opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in setitem(self, **kwargs) 3166 3167 def setitem(self, **kwargs): -> 3168 return self.apply('setitem', **kwargs) 3169 3170 def putmask(self, **kwargs): /opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs) 3054 3055 kwargs['mgr'] = self -> 3056 applied = getattr(b, f)(**kwargs) 3057 result_blocks = _extend_blocks(applied, result_blocks) 3058 /opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in setitem(self, indexer, value, mgr) 685 indexer.dtype == np.bool_ and 686 len(indexer[indexer]) == len(value)): --> 687 raise ValueError("cannot set using a list-like indexer " 688 "with a different length than the value") 689 ValueError: cannot set using a list-like indexer with a different length than the value

3条回答

网友

1楼 · 编辑于 2024-04-24 22:11:36

似乎错误不是因为数组维数：

from pandas_ml import ConfusionMatrix
y_actu = [1,2,2]
y_pred = [1,1,2]
cm = ConfusionMatrix(y_actu, y_pred)
cm.print_stats()

这个（二进制分类问题）工作得很好。在

可能是多类分类问题的混淆矩阵被破坏了。在

更新： 我只需做以下步骤：

^{pr2}$

得到熊猫0.20.1 然后呢

pip install -U pandas_ml

现在一切都很好，与乳状混乱矩阵：

from pandas_ml import ConfusionMatrix
y_actu = [1,2,3]
y_pred = [1,2,3]
cm = ConfusionMatrix(y_actu, y_pred)
cm.print_stats()

我得到了输出：

Class Statistics:

Classes                                       1         2         3
Population                                    3         3         3
P: Condition positive                         1         1         1
N: Condition negative                         2         2         2
Test outcome positive                         1         1         1
Test outcome negative                         2         2         2
TP: True Positive                             1         1         1
TN: True Negative                             2         2         2
FP: False Positive                            0         0         0
FN: False Negative                            0         0         0
TPR: (Sensitivity, hit rate, recall)          1         1         1
TNR=SPC: (Specificity)                        1         1         1
PPV: Pos Pred Value (Precision)               1         1         1
NPV: Neg Pred Value                           1         1         1
FPR: False-out                                0         0         0
FDR: False Discovery Rate                     0         0         0
FNR: Miss Rate                                0         0         0
ACC: Accuracy                                 1         1         1
F1 score                                      1         1         1
MCC: Matthews correlation coefficient         1         1         1
Informedness                                  1         1         1
Markedness                                    1         1         1
Prevalence                             0.333333  0.333333  0.333333
LR+: Positive likelihood ratio              inf       inf       inf
LR-: Negative likelihood ratio                0         0         0
DOR: Diagnostic odds ratio                  inf       inf       inf
FOR: False omission rate                      0         0         0

网友

2楼 · 编辑于 2024-04-24 22:11:36

我建议使用来自scikit-learn的confusion_matrix。您提到的其他指标，如精确度、召回率、F1分数也可以从sklearn.metrics获得。在

>>> from sklearn.metrics import confusion_matrix
>>> y_actu = [1,2,3]
>>> y_pred = [1,2,3]
>>> confusion_matrix(y_actu, y_pred)
array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])

网友

3楼 · 编辑于 2024-04-24 22:11:36

我还使用并推荐sklearnconfusion_matrix函数。就个人而言，我还保留了一个"pretty-print confusion matrix"函数，并提供了一些额外的便利：

沿着混淆矩阵轴打印的类标签
混淆矩阵统计标准化，使所有单元格总和为1
根据单元格值缩放的混淆矩阵单元格颜色
其他指标，如F分数等打印在混淆矩阵下方。在

像这样：

以下是绘图函数，主要基于this example from the Scikit-Learn documentation

import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import classification_report

def pretty_print_conf_matrix(y_true, y_pred, 
                             classes,
                             normalize=False,
                             title='Confusion matrix',
                             cmap=plt.cm.Blues):
    """
    Mostly stolen from: http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py

    Normalization changed, classification_report stats added below plot
    """

    cm = confusion_matrix(y_true, y_pred)

    # Configure Confusion Matrix Plot Aesthetics (no text yet) 
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, fontsize=14)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    plt.ylabel('True label', fontsize=12)
    plt.xlabel('Predicted label', fontsize=12)

    # Calculate normalized values (so all cells sum to 1) if desired
    if normalize:
        cm = np.round(cm.astype('float') / cm.sum(),2) #(axis=1)[:, np.newaxis]

    # Place Numbers as Text on Confusion Matrix Plot
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black",
                 fontsize=12)


    # Add Precision, Recall, F-1 Score as Captions Below Plot
    rpt = classification_report(y_true, y_pred)
    rpt = rpt.replace('avg / total', '      avg')
    rpt = rpt.replace('support', 'N Obs')

    plt.annotate(rpt, 
                 xy = (0,0), 
                 xytext = (-50, -140), 
                 xycoords='axes fraction', textcoords='offset points',
                 fontsize=12, ha='left')    

    # Plot
    plt.tight_layout()

下面是一个使用虹膜数据生成绘图图像的示例：

^{pr2}$

相关问题更多 >

编程相关推荐

热门问题

热门文章