尝试分配分数时,值的长度与索引的长度不匹配

2024-05-14 23:45:14 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试在下面的数据集中分配RFM分数。当我创建数据子集(例如df_not1数据帧)时,我无法运行评分代码,抛出错误“ValueError:值的长度与索引的长度不匹配”

import pandas as pd
import numpy as np
  
# intialise data of lists.
data = {'cust':["a1",   "a2",   "a3",   "a4",   "a5",   "a6",   "a7",   "a8",   "a9",   "a10",  "a11",  "a12",  "a13",  "a14",  "a15",  "a16",  "a17",  "a18",  "a19",  "a20",  "a21",  "a22",  "a23",  "a24",  "a25",  "a26",  "a27",  "a28",  "a29",  "a30",  "a31",  "a32",  "a33",  "a34",  "a35",  "a36",  "a37",  "a38",  "a39",  "a40",  "a41",  "a42",  "a43",  "a44",  "a45",  "a46",  "a47",  "a48",  "a49",  "a50",  "a51"],
        'recency':[3,   7,  9,  9,  6,  8,  3,  9,  6,  5,  8,  6,  2,  8,  3,  3,  2,  7,  3,  1,  7,  6,  10, 6,  2,  8,  6,  10, 2,  7,  9,  1,  1,  3,  6,  4,  6,  4,  6,  6,  7,  3,  7,  9,  6,  4,  7,  3,  1,  9,  3],
        'frequency':[15,    9,  13, 9,  19, 1,  11, 20, 20, 15, 15, 18, 1,  9,  20, 14, 11, 11, 4,  15, 1,  8,  17, 19, 13, 20, 1,  11, 3,  8,  2,  4,  15, 5,  12, 15, 20, 6,  19, 2,  6,  12, 6,  6,  4,  7,  2,  3,  20, 13, 11],
       'monetary':[8854,    5614,   2687,   3553,   1801,   1076,   9724,   7778,   8382,   4391,   6766,   9905,   3181,   4170,   7544,   2997,   3025,   9358,   6015,   9919,   5132,   3598,   8779,   4420,   8931,   1492,   5491,   4186,   4720,   2568,   2530,   4618,   4109,   9384,   3000,   9766,   9524,   1027,   6315,   9806,   3442,   7256,   2432,   2429,   7696,   4527,   1802,   6606,   3018,   6295,   2985]}

  
# Create DataFrame
df = pd.DataFrame(data)

df_1=df[df['frequency']==1]
df_not1=df[df['frequency']!=1]

rfm_df=df_not1[['recency','frequency','monetary']]

s1=np.full((1,rfm_df.shape[0]-int(0.8 * rfm_df.shape[0])),1)
s2=np.full((1,int(0.2*rfm_df.shape[0])),2)
s3=np.full((1,int(0.2*rfm_df.shape[0])),3)
s4=np.full((1,int(0.2*rfm_df.shape[0])),4)
s5=np.full((1,int(0.2*rfm_df.shape[0])),5)
score=np.hstack((s1,s2,s3,s4,s5)).flatten()

rfm_df=rfm_df.sort_values(by='recency',ascending=False)
rfm_df['r_score']=score

for i,j in zip(('frequency','monetary'),('f_score','m_score')):
    rfm_df=rfm_df.sort_values(by=i)
    rfm_df[j]=score

但是,它不允许运行代码,因为我得到了这个错误

ValueError: Length of values (46) does not match length of index (47)

Tags: of数据dfdatanpfullintscore

热门问题