Pandas的矢量化应用功能

2024-04-26 23:22:11 发布

您现在位置:Python中文网/ 问答频道 /正文

我需要对temp_func进行矢量化。问题是,逻辑是复杂的,还涉及到组合(爆炸)

样本数据-

df_noncc2 = pd.DataFrame({'__loan_tenure': {69345: [6, 9, 12, 15, 18],
  71660: [24, 36, 48, 60, 90],
  74959: [6, 9, 12, 15, 18],
  54873: [6, 9, 12, 15, 18],
  67544: [6, 9, 12, 15, 18],
  46328: [6, 9, 12, 15, 18],
  24202: [6, 9, 12, 15, 18],
  34787: [6, 9, 12, 15, 18],
  52016: [6, 9, 12, 15, 18],
  4271: [12, 24, 36, 12, 48]},
 '__int_rate': {69345: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  71660: [0.0075, 0.008333333333333333, 0.009166666666666667, 0.01],
  74959: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  54873: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  67544: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  46328: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  24202: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  34787: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  52016: [0.0,
   0.008333333333333333,
   0.012499999999999999,
   0.020833333333333332,
   0.024999999999999998],
  4271: [0.0075, 0.008333333333333333, 0.009166666666666667, 0.01]},
 'EMI_tenure_months': {69345: 13,
  71660: 9,
  74959: 10,
  54873: 13,
  67544: 11,
  46328: 13,
  24202: 6,
  34787: 9,
  52016: 17,
  4271: 12},
 'Amount': {69345: 24300.0,
  71660: 382473.0,
  74959: 13159.0,
  54873: 22990.0,
  67544: 23501.0,
  46328: 12000.0,
  24202: 7089.0,
  34787: 14500.0,
  52016: 25200.0,
  4271: 31098.0},
 'Out_standing_Balance': {69345: 0.0,
  71660: 0.0,
  74959: 0.0,
  54873: 0.0,
  67544: 0.0,
  46328: 0.0,
  24202: 0.0,
  34787: 0.0,
  52016: 0.0,
  4271: 0.0}})

以下是我正在使用的代码-

def temp_func(x):
    loan_tenure = x['__loan_tenure']
    int_rate = x['__int_rate']
    EMI_tenure_months = x['EMI_tenure_months']
    Amount = x['Amount']
    Out_standing_Balance = x['Out_standing_Balance']

    # Below part needs to be vetorized
    exp1 = np.array([(x, y) for x in loan_tenure for y in int_rate])
    exp1 = pd.DataFrame(data=exp1,columns=['Tenure','Interest_Rate'])
    exp1['EMI_tenure'] = EMI_tenure_months
    exp1['Loan'] = Amount
    exp1['Actual_Balance']= Out_standing_Balance
    exp1['Pending_n'] = exp1['Tenure']-exp1['EMI_tenure']
    exp1 = exp1.loc[exp1['Pending_n'] >= 0]
    exp1['EMI1'] = (exp1['Loan']*exp1['Interest_Rate']*pow(1+exp1['Interest_Rate'],exp1['Tenure']))/(pow(1+exp1['Interest_Rate'],exp1['Tenure'])-1) 
    exp1['Calculated_Balance'] = exp1['EMI1']*(1-(1/(pow(1+exp1['Interest_Rate'],exp1['Pending_n']))))/exp1['Interest_Rate']
    exp1['Error'] = abs(exp1['Actual_Balance']-exp1['Calculated_Balance'])
    exp1['Error_pe'] = abs(exp1['Actual_Balance']-exp1['Calculated_Balance'])/exp1['Actual_Balance']
    exp1 = exp1.loc[exp1['Tenure'] >= exp1['EMI_tenure'] ]
    exp2 = exp1.loc[exp1['Error'].idxmin()]
    Loan_tenure = exp2['Tenure']
    EMI = exp2['EMI1']
    InterestRate1 = (exp2['Interest_Rate'])*12
    return InterestRate1

df_noncc2.apply(temp_func, axis=1)